Continuous learning
This section describes how to automatically retrain a model with new data in AI & Analytics Engine.
For those that are trained successfully, you can generate and get model insights as in the following example:
import os
from aiaengine import (
DataType,
Column,
Project,
FileSource,
Regressors,
RegressionConfig
)
project_id = os.environ.get("PROJECT_ID", "4c098a3f-29ba-456a-bec3-3a3ad41aa943") # set your own project ID
project = Project(id=project_id)
dataset = project.create_dataset(
name=f"Penguins Regression - Continuous Learning Y",
data_source=FileSource(
file_urls=["examples/datasets/penguins_regression_initial_data.csv"], # local file path
schema=[
Column("Flipper Length (mm)", DataType.Numeric),
Column("Body Mass (g)", DataType.Numeric)
]
)
)
# next, we will create a new regression application
app = project.create_app(
name=f"Predict penguin body mass (Continuous Learning)",
dataset_id=dataset.id,
config=RegressionConfig(
target_column="Body Mass (g)"
)
)
# get the recommended feature set
feature_set = app.get_recommended_feature_set()
print('Recommended features')
print(feature_set.feature_names)
# train a new model with continuous learning enabled
model = app.create_model(
name="XGBoost Regressor",
template_id=Regressors.XGBoost,
feature_set_id=feature_set.id,
continuous_learning=True, # enable continous learning
)
# note: you can enable continous learning for a trained model at anytime if
# that model it is not enabled
# model.enable_continous_learning()
# evaluate the model
evaluation = model.evaluate()
print("Evaluation metrics")
print(evaluation.result.details['metrics'])
# update dataset with new data
latest_dataset_version = dataset.update_data(FileSource(file_urls=["examples/datasets/penguins_regression_additional_data.csv"]))
# waiting for the model to be retrained with new train data
model.wait_for_trained(dataset_version=latest_dataset_version, timeout=1200) # wait a little bit longer for the data to be processed before training
# evaluate the newly retrained model
evaluation = model.evaluate(train_dataset_version=latest_dataset_version, test_dataset_version=latest_dataset_version)
print("New evaluation metrics")
print(evaluation.result.details['metrics'])
Coming soon
package com.aiaengine.examples.model;
import com.aiaengine.*;
import com.aiaengine.app.RegressionConfig;
import com.aiaengine.app.request.CreateModelRequest;
import com.aiaengine.dataset.request.UpdateRequest;
import com.aiaengine.datasource.DataSource;
import com.aiaengine.datasource.Schema;
import com.aiaengine.datasource.file.CSVFileSettings;
import com.aiaengine.datasource.file.FileSourceRequest;
import com.aiaengine.datasource.file.FileType;
import com.aiaengine.model.request.EvaluationRequest;
import com.aiaengine.org.request.CreateProjectRequest;
import com.aiaengine.project.request.CreateAppRequest;
import com.aiaengine.project.request.CreateDatasetRequest;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
public class ContinuousLearningApp {
public static void main(String[] args) throws FileNotFoundException {
Engine engine = new Engine();
// create a new demo project in the org
Org org = engine.getOrg("cae24b10-e6b0-4d61-8cef-a9f4b8f6133d"); // replace with your org ID
Project project = org.createProject(CreateProjectRequest.builder()
.name("Demo project using Java SDK")
.description("Your demo project")
.build());
// or you can get an existing project that you want to work on
// Project project = engine.getProject("ID_of_your_project") // replace with your own project ID
// import the `German Credit Data` dataset
String dataFilePath = "examples/datasets/penguins_regression_initial_data.csv";
List<Schema.Column> columns = new ArrayList<>();
columns.add(new Schema.Column("Flipper Length (mm)", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("Body Mass (g)", Schema.SemanticType.NUMERIC));
DataSource localDataSource = engine.buildFileSource(FileSourceRequest.builder()
.fileType(FileType.CSV)
.url(dataFilePath)
.fileSettings(new CSVFileSettings())
.schema(new Schema(columns))
.build());
Dataset dataset = project.createDataset(CreateDatasetRequest.builder()
.name("Penguins Regression - Continuous Learning")
.dataSource(localDataSource)
.timeout(900)
.build());
App app = project.createApp(CreateAppRequest.builder()
.name("Predict penguin body mass (Continuous Learning)")
.datasetId(dataset.getId())
.config(new RegressionConfig("Body Mass (g)"))
.build());
//use recommended featureset
FeatureSet featureSet = app.getRecommendedFeatureSet(600);
//train model with default hyperparameters
Model model = app.createModel(CreateModelRequest.builder()
.name("XGBoost Regressor")
.featureSetId(featureSet.getId())
.templateId("xgboosting_regression")
.continuousLearning(true)
.build());
// evaluate the model
Evaluation evaluation = model.evaluate(EvaluationRequest.builder().build());
System.out.println(String.format("Evaluation summary: %s", evaluation.getResult().getSummary()));
System.out.println(String.format("Evaluation metrics: %s", evaluation.getResult().getDetails().get("metrics")));
//update dataset with new data
int latestDatasetVersion = dataset.updateData(UpdateRequest.builder()
.dataSource(engine.buildFileSource(FileSourceRequest.builder()
.url("examples/datasets/penguins_regression_additional_data.csv")
.fileType(FileType.CSV)
.build()))
.build());
//waiting for the model to be retrained with new train data
model.waitForTrained(latestDatasetVersion, 1200);
//evaluate the newly retrained model
Evaluation evaluation2 = model.evaluate(EvaluationRequest.builder()
.trainDatasetVersion(latestDatasetVersion)
.testDatasetVersion(latestDatasetVersion)
.build());
System.out.println(String.format("Evaluation summary: %s", evaluation2.getResult().getSummary()));
System.out.println(String.format("Evaluation metrics: %s", evaluation2.getResult().getDetails().get("metrics")));
}
}