Using the Java SDK

This page gives a walk-through of performing the task introduced in the Quick Start Introduction page using the aiaengine SDK that aids API access to the AI & Analytics Engine. Re-usable snippets of code are provided.

SDK & System Requirements

AI & Analytics Engine Java SDK requires Java SDK 8+
Download AI & Analytics Engine Java SDK.
Put the SDK .jar file in the lib folder of your project or add it the classpath in your build file.

Setting up your environment for API access

You will first need to setup an environment variable called AIA_ENGINE_CONFIG_FILE, which stores the path of a configuration file. A good practice is to make a .aiaengine folder in your home directory, and put a command to define the environment variable in your .bashrc file:

$ grep -n -H "AIA_ENGINE_CONFIG_FILE" ~/.bashrc
/home/new-user/.bashrc:122:export AIA_ENGINE_CONFIG_FILE="/home/new-user/.aiaengine/config.json"

Here is the template you need to follow for the config.json file. Simply fill in your email address you used for your registration, and your AI & Analytics Engine password:

{
  "target": "grpc.aiaengine.com:443",
  "secure": true,
  "auth": {
    "provider": "email_password",
    "data": {
        "email": "abcd.tuvw@example.com",
        "password": "qwerty123"
    }
  }
}

Using the SDK

The following example code shows you how to use the Python SDK to import a dataset from a local CSV file, create a binary classification app, train a model and then run prediction using the trained model.

package com.aiaengine.examples;

import com.aiaengine.*;
import com.aiaengine.app.ClassificationConfig;
import com.aiaengine.app.request.CreateModelRequest;
import com.aiaengine.dataset.request.DownloadRequest;
import com.aiaengine.datasource.DataSource;
import com.aiaengine.datasource.Schema;
import com.aiaengine.datasource.file.CSVFileSettings;
import com.aiaengine.datasource.file.FileSourceRequest;
import com.aiaengine.datasource.file.FileType;
import com.aiaengine.featureset.RecommendedModel;
import com.aiaengine.model.request.EvaluationRequest;
import com.aiaengine.model.request.RunBatchPredictionRequest;
import com.aiaengine.org.request.CreateProjectRequest;
import com.aiaengine.project.request.CreateAppRequest;
import com.aiaengine.project.request.CreateDatasetRequest;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

public class QuickStartApp {
    public static void main(String[] args) throws IOException {
        Engine engine = new Engine();
        //TODO: replace with your org ID
        Org org = engine.getOrg("");

        // Create a new project.
        // Within an organisation are various projects.
        // To continue, the Engine requires project with a given `name`, `description`.
        // Once the project is created, a unique id `project_id` is generated and needs to be used in associated tasks.
        Project project = org.createProject(CreateProjectRequest.builder()
                .name("Demo project using Java SDK")
                .description("Your demo project")
                .build());

        // Import a new dataset
        // Now it is time to upload your data. We use the German Credit dataset for a simple illustration.
        String dataFilePath = "examples/datasets/german-credit.csv";
        List<Schema.Column> columns = new ArrayList<>();
        columns.add(new Schema.Column("checking_status", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("duration", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("credit_history", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("purpose", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("credit_amount", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("savings_status", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("employment", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("installment_commitment", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("personal_status", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("other_parties", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("residence_since", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("property_magnitude", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("age", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("other_payment_plans", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("housing", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("existing_credits", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("job", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("num_dependents", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("own_telephone", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("foreign_worker", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("class", Schema.SemanticType.TEXT));
        DataSource localDataSource = engine.buildFileSource(FileSourceRequest.builder()
                .fileType(FileType.CSV)
                .url(dataFilePath)
                .fileSettings(new CSVFileSettings())
                .schema(new Schema(columns))
                .build());

        Dataset dataset = project.createDataset(CreateDatasetRequest.builder()
                .name("German Credit Data")
                .dataSource(localDataSource)
                .timeout(900)
                .build());

        // Creating a new app
        // To move on, you will next need to create an "App" from this dataset.
        // An app is a special container on the Engine that holds multiple models
        // trained and evaluated on the same train set and test set. An app enables you
        // to train, evaluate, and compare models using different machine learning
        // algorithms (called "templates" on the Engine). To create an app you need to
        // specify the dataset id, problem type ("classification" for the German Credit Data),
        // target columns and the proportion of data you assign for training.
        App app = project.createApp(CreateAppRequest.builder()
                .name("German Credit Risk Prediction Task")
                .datasetId(dataset.getId())
                .config(new ClassificationConfig("class",
                        ClassificationConfig.ClassificationSubType.BINARY,
                        "good", "bad"))
                .build());

        // The processing that occurs during app creation involves:
        // 1. Splitting your data into train and test,
        // 2. Computing additional stats on your data,
        // 3. Using the model recommender to predict beforehand how fast models can be
        // trained from available templates as well as their estimated quality (in terms
        // of how accurate its predictions are).

        // Feature sets
        // The `Recommended features` set is created by default and contains recommended features in the data.
        FeatureSet featureSet = app.getRecommendedFeatureSet(600);
        System.out.println(featureSet.getFeatureNames());

        // Or you can create a new feature set
        /*
        Set<String> selectedFeatures = new HashSet<>();
        selectedFeatures.add("credit_amount");
        selectedFeatures.add("installment_commitment");
        selectedFeatures.add("residence_since");
        selectedFeatures.add("age");
        selectedFeatures.add("existing_credits");
        selectedFeatures.add("num_dependents");
        app.createFeatureSet(CreateFeatureSetRequest.builder()
                .name("Selected features")
                .featureNames(selectedFeatures)
                .build());
         */

        // Selecting recommended models
        // Once the app is processed successfully, model recommendations are provided
        // with predicted performance over a range of metrics such as accuracy and
        // F1-macro score (for classification), as well as estimated time cost in
        // training and prediction. In this example, we select the top 5 models based on
        // F1-macro score.
        List<RecommendedModel> recommendedModels = featureSet.selectRecommendedModels(5, "f1_macro");
        System.out.println(recommendedModels);

        // Training models
        // Once the decision on which models to train is made, you can start to train the selected models.

        Model model = app.createModel(CreateModelRequest.builder()
                .name("XGBoost Classifier")
                .templateId("xgboosting_clf")
                .featureSetId(featureSet.getId())
                .build());

        // Evaluating your model performance
        // You can get the evaluation of the trained model to see how it performs on the test portion of the input dataset.
        Evaluation evaluation = model.evaluate(EvaluationRequest.builder().testDatasetVersion(0).build());
        System.out.println(String.format("Evaluation summary: %s", evaluation.getResult().getSummary()));
        System.out.println(String.format("Evaluation metrics: %s", ((Map<String, Object>) evaluation.getResult().getDetails().get("threshold_independent")).get("metrics")));

        // Using your model to predict on new data
        // run a batch prediction
        String predictionDataFile = "examples/datasets/german-credit-predict.csv";
        Prediction prediction = model.runBatchPrediction(RunBatchPredictionRequest.builder()
                .dataSource(engine.buildFileSource(FileSourceRequest.builder()
                        .urls(Collections.singletonList(predictionDataFile))
                        .fileType(FileType.CSV)
                        .build()))
                .build());

        // can download the predicted data into current folder
        prediction.getResult().download(DownloadRequest.builder().outputFolder("./").build());
    }
}