Continuous learning

This section describes how to automatically retrain a model with new data in AI & Analytics Engine.

For those that are trained successfully, you can generate and get model insights as in the following example:

import os

from aiaengine import (
    SemanticType,
    Column,
    Project,
    FileSource,
    Regressors,
    RegressionConfig
)

project_id = os.environ.get("PROJECT_ID", "4c098a3f-29ba-456a-bec3-3a3ad41aa943") # set your own project ID
project = Project(id=project_id)

dataset = project.create_dataset(
    name=f"Penguins Regression - Continuous Learning Y",
    data_source=FileSource(
        file_urls=["examples/datasets/penguins_regression_initial_data.csv"], # local file path
        schema=[
            Column("Flipper Length (mm)", SemanticType.Numeric),
            Column("Body Mass (g)", SemanticType.Numeric)
        ]
    )
)

# next, we will create a new regression application
app = project.create_app(
    name=f"Predict penguin body mass (Continuous Learning)",
    dataset_id=dataset.id,
    config=RegressionConfig(
        target_column="Body Mass (g)"
    )
)

# get the recommended feature set
feature_set = app.get_recommended_feature_set()
print('Recommended features')
print(feature_set.feature_names)

# train a new model with continuous learning enabled
model = app.create_model(
    name="XGBoost Regressor",
    template_id=Regressors.XGBoost,
    feature_set_id=feature_set.id,
    continuous_learning=True, # enable continous learning
)

# note: you can enable continous learning for a trained model at anytime if
# that model it is not enabled
# model.enable_continous_learning()

# evaluate the model
evaluation = model.evaluate()
print("Evaluation metrics")
print(evaluation.result.details['metrics'])


# update dataset with new data
latest_dataset_version = dataset.update_data(FileSource(file_urls=["examples/datasets/penguins_regression_additional_data.csv"]))

# waiting for the model to be retrained with new train data
model.wait_for_trained(dataset_version=latest_dataset_version, timeout=1200) # wait a little bit longer for the data to be processed before training

# evaluate the newly retrained model
evaluation = model.evaluate(train_dataset_version=latest_dataset_version, test_dataset_version=latest_dataset_version)
print("New evaluation metrics")
print(evaluation.result.details['metrics'])
Coming soon
package com.aiaengine.examples.model;

import com.aiaengine.*;
import com.aiaengine.app.RegressionConfig;
import com.aiaengine.app.request.CreateModelRequest;
import com.aiaengine.dataset.request.UpdateRequest;
import com.aiaengine.datasource.DataSource;
import com.aiaengine.datasource.Schema;
import com.aiaengine.datasource.file.CSVFileSettings;
import com.aiaengine.datasource.file.FileSourceRequest;
import com.aiaengine.datasource.file.FileType;
import com.aiaengine.model.request.EvaluationRequest;
import com.aiaengine.org.request.CreateProjectRequest;
import com.aiaengine.project.request.CreateAppRequest;
import com.aiaengine.project.request.CreateDatasetRequest;

import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;

public class ContinuousLearningApp {
    public static void main(String[] args) throws FileNotFoundException {
        Engine engine = new Engine();
        // create a new demo project in the org
        Org org = engine.getOrg("cae24b10-e6b0-4d61-8cef-a9f4b8f6133d"); // replace with your org ID
        Project project = org.createProject(CreateProjectRequest.builder()
                .name("Demo project using Java SDK")
                .description("Your demo project")
                .build());
        // or you can get an existing project that you want to work on
        // Project project = engine.getProject("ID_of_your_project") // replace with your own project ID

        // import the `German Credit Data` dataset
        String dataFilePath = "examples/datasets/penguins_regression_initial_data.csv";
        List<Schema.Column> columns = new ArrayList<>();
        columns.add(new Schema.Column("Flipper Length (mm)", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("Body Mass (g)", Schema.SemanticType.NUMERIC));
        DataSource localDataSource = engine.buildFileSource(FileSourceRequest.builder()
                .fileType(FileType.CSV)
                .url(dataFilePath)
                .fileSettings(new CSVFileSettings())
                .schema(new Schema(columns))
                .build());

        Dataset dataset = project.createDataset(CreateDatasetRequest.builder()
                .name("Penguins Regression - Continuous Learning")
                .dataSource(localDataSource)
                .timeout(900)
                .build());

        App app = project.createApp(CreateAppRequest.builder()
                .name("Predict penguin body mass (Continuous Learning)")
                .datasetId(dataset.getId())
                .config(new RegressionConfig("Body Mass (g)"))
                .build());

        //use recommended featureset
        FeatureSet featureSet = app.getRecommendedFeatureSet(600);
        //train model with default hyperparameters
        Model model = app.createModel(CreateModelRequest.builder()
                .name("XGBoost Regressor")
                .featureSetId(featureSet.getId())
                .templateId("xgboosting_regression")
                .continuousLearning(true)
                .build());

        // evaluate the model
        Evaluation evaluation = model.evaluate(EvaluationRequest.builder().build());
        System.out.println(String.format("Evaluation summary: %s", evaluation.getResult().getSummary()));
        System.out.println(String.format("Evaluation metrics: %s", evaluation.getResult().getDetails().get("metrics")));

        //update dataset with new data
        int latestDatasetVersion = dataset.updateData(UpdateRequest.builder()
                .dataSource(engine.buildFileSource(FileSourceRequest.builder()
                        .url("examples/datasets/penguins_regression_additional_data.csv")
                        .fileType(FileType.CSV)
                        .build()))
                .build());

        //waiting for the model to be retrained with new train data
        model.waitForTrained(latestDatasetVersion, 1200);

        //evaluate the newly retrained model
        Evaluation evaluation2 = model.evaluate(EvaluationRequest.builder()
                .trainDatasetVersion(latestDatasetVersion)
                .testDatasetVersion(latestDatasetVersion)
                .build());
        System.out.println(String.format("Evaluation summary: %s", evaluation2.getResult().getSummary()));
        System.out.println(String.format("Evaluation metrics: %s", evaluation2.getResult().getDetails().get("metrics")));
    }
}