Running batch prediction
This section shows you how to make predictions by a deployed model for new data on the AI & Analytics Engine.
Run batch prediction using local files as input
from aiaengine import Model, FileSource
model = Model(id='c8dbd4fd-4a33-4923-ab17-75e0a5139eb7')
# run batch prediction with local files using FileSource
prediction = model.run_batch_prediction(FileSource(
file_urls=['examples/datasets/german-credit-predict.csv']
)
)
print(prediction.result.to_pandas().head(100))
package com.aiaengine.examples.model;
import com.aiaengine.Engine;
import com.aiaengine.Model;
import com.aiaengine.Prediction;
import com.aiaengine.dataset.request.DownloadRequest;
import com.aiaengine.datasource.file.FileSourceRequest;
import com.aiaengine.datasource.file.FileType;
import com.aiaengine.model.request.RunBatchPredictionRequest;
import java.io.IOException;
public class BatchPredictionLocalFilesApp {
public static void main(String[] args) throws IOException {
Engine engine = new Engine();
//replace with your model id
Model model = engine.getModel("9310c78a-3970-457f-afea-6e2e2545eb7b");
String predictionDataFile = "examples/datasets/german-credit-predict.csv";
Prediction prediction = model.runBatchPrediction(RunBatchPredictionRequest.builder()
.dataSource(engine.buildFileSource(FileSourceRequest.builder()
.url(predictionDataFile)
.fileType(FileType.CSV)
.build()))
.build());
//download result to local
prediction.getResult().download(DownloadRequest.builder().outputFolder("./").build());
}
}
Run batch prediction using database as input
from aiaengine import Model, DatabaseSource
model = Model(id='c8dbd4fd-4a33-4923-ab17-75e0a5139eb7')
# run batch prediction with data from a database
prediction = model.run_batch_prediction(DatabaseSource(
host='postgres.example.com',
port='5432',
user='postgres',
password='postgres',
database='postgres',
table='german_credit'
)
)
print(prediction.result.to_pandas().head(100))
Run batch prediction using an existing dataset as input
from aiaengine import Project, Column, DataType, FileSource
from aiaengine import Model, DatasetSource
# import the `German Credit Data Prediction` dataset
# If you have an existing dataset, you can skip this step and just use the ID of
# that dataset to run the prediction in the next step
project = Project(id='2ad4c25a-996a-4753-a9fe-eb4328eec9f2')
data_file = 'examples/datasets/german-credit-predict.csv'
dataset = project.create_dataset(
name=f"German Credit Data for Prediction",
data_source=FileSource(
file_urls=[data_file],
schema=[
Column('checking_status', DataType.Text),
Column('duration', DataType.Numeric),
Column('credit_history', DataType.Text),
Column('purpose', DataType.Text),
Column('credit_amount', DataType.Numeric),
Column('savings_status', DataType.Text),
Column('employment', DataType.Text),
Column('installment_commitment', DataType.Numeric),
Column('personal_status', DataType.Text),
Column('other_parties', DataType.Text),
Column('residence_since', DataType.Numeric),
Column('property_magnitude', DataType.Text),
Column('age', DataType.Numeric),
Column('other_payment_plans', DataType.Text),
Column('housing', DataType.Text),
Column('existing_credits', DataType.Numeric),
Column('job', DataType.Text),
Column('num_dependents', DataType.Numeric),
Column('own_telephone', DataType.Text),
Column('foreign_worker', DataType.Text)
]
)
)
# get the model
model = Model(id='c8dbd4fd-4a33-4923-ab17-75e0a5139eb7')
# run batch prediction with dataset we just imported
dataset_id = dataset.id
prediction = model.run_batch_prediction(DatasetSource(dataset_id=dataset_id))
print(prediction.result.to_pandas().head(100))
package com.aiaengine.examples.model;
import com.aiaengine.*;
import com.aiaengine.dataset.request.DownloadRequest;
import com.aiaengine.datasource.DataSource;
import com.aiaengine.datasource.Schema;
import com.aiaengine.datasource.file.CSVFileSettings;
import com.aiaengine.datasource.file.FileSourceRequest;
import com.aiaengine.datasource.file.FileType;
import com.aiaengine.model.request.RunBatchPredictionRequest;
import com.aiaengine.project.request.CreateDatasetRequest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class BatchPredictionDatasetApp {
public static void main(String[] args) throws IOException {
Engine engine = new Engine();
// import the `German Credit Data Prediction` dataset
// If you have an existing dataset, you can skip this step and just use the ID of
// that dataset to run the prediction in the next step
Project project = engine.getProject("c6e4589e-9cf4-4191-a85b-6eaf5fa80bf5");
String dataFilePath = "examples/datasets/german-credit-predict.csv";
List<Schema.Column> columns = new ArrayList<>();
columns.add(new Schema.Column("checking_status", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("duration", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("credit_history", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("purpose", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("credit_amount", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("savings_status", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("employment", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("installment_commitment", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("personal_status", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("other_parties", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("residence_since", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("property_magnitude", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("age", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("other_payment_plans", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("housing", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("existing_credits", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("job", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("num_dependents", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("own_telephone", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("foreign_worker", Schema.SemanticType.TEXT));
DataSource localDataSource = engine.buildFileSource(FileSourceRequest.builder()
.fileType(FileType.CSV)
.url(dataFilePath)
.fileSettings(new CSVFileSettings())
.schema(new Schema(columns))
.build());
Dataset dataset = project.createDataset(CreateDatasetRequest.builder()
.name("German Credit Data for Prediction")
.dataSource(localDataSource)
.timeout(900)
.build());
//replace with your model id
Model model = engine.getModel("9310c78a-3970-457f-afea-6e2e2545eb7b");
//run batch prediction with dataset we just imported
Prediction prediction = model.runBatchPrediction(RunBatchPredictionRequest.builder()
.dataSource(new DatasetSource(dataset.getId()))
.build());
//download result to local
prediction.getResult().download(DownloadRequest.builder().outputFolder("./").build());
}
}