Skip to content

Running batch prediction

This section shows you how to make predictions by a deployed model for new data on the AI & Analytics Engine.

Run batch prediction using local files as input

from aiaengine import Model, FileSource

model = Model(id='c8dbd4fd-4a33-4923-ab17-75e0a5139eb7')

# run batch prediction with local files using FileSource
prediction = model.run_batch_prediction(FileSource(
        file_urls=['examples/datasets/german-credit-predict.csv']
    )
)

print(prediction.result.to_pandas().head(100))
package com.aiaengine.examples.model;

import com.aiaengine.Engine;
import com.aiaengine.Model;
import com.aiaengine.Prediction;
import com.aiaengine.dataset.request.DownloadRequest;
import com.aiaengine.datasource.file.FileSourceRequest;
import com.aiaengine.datasource.file.FileType;
import com.aiaengine.model.request.RunBatchPredictionRequest;

import java.io.IOException;

public class BatchPredictionLocalFilesApp {
    public static void main(String[] args) throws IOException {
        Engine engine = new Engine();
        //replace with your model id
        Model model = engine.getModel("9310c78a-3970-457f-afea-6e2e2545eb7b");
        String predictionDataFile = "examples/datasets/german-credit-predict.csv";
        Prediction prediction = model.runBatchPrediction(RunBatchPredictionRequest.builder()
                .dataSource(engine.buildFileSource(FileSourceRequest.builder()
                        .url(predictionDataFile)
                        .fileType(FileType.CSV)
                        .build()))
                .build());
        //download result to local
        prediction.getResult().download(DownloadRequest.builder().outputFolder("./").build());
    }
}

Run batch prediction using database as input

from aiaengine import Model, DatabaseSource

model = Model(id='c8dbd4fd-4a33-4923-ab17-75e0a5139eb7')

# run batch prediction with data from a database
prediction = model.run_batch_prediction(DatabaseSource(
        host='postgres.example.com',
        port='5432',
        user='postgres',
        password='postgres',
        database='postgres',
        table='german_credit'
    )
)

print(prediction.result.to_pandas().head(100))
Coming soon

Run batch prediction using an existing dataset as input

from aiaengine import Project, Column, SemanticType, FileSource
from aiaengine import Model, DatasetSource

# import the `German Credit Data Prediction` dataset
# If you have an existing dataset, you can skip this step and just use the ID of
# that dataset to run the prediction in the next step
project = Project(id='2ad4c25a-996a-4753-a9fe-eb4328eec9f2')
data_file = 'examples/datasets/german-credit-predict.csv'
dataset = project.create_dataset(
    name=f"German Credit Data for Prediction",
    data_source=FileSource(
        file_urls=[data_file],
        schema=[
            Column('checking_status', SemanticType.Text),
            Column('duration', SemanticType.Numeric),
            Column('credit_history', SemanticType.Text),
            Column('purpose', SemanticType.Text),
            Column('credit_amount', SemanticType.Numeric),
            Column('savings_status', SemanticType.Text),
            Column('employment', SemanticType.Text),
            Column('installment_commitment', SemanticType.Numeric),
            Column('personal_status', SemanticType.Text),
            Column('other_parties', SemanticType.Text),
            Column('residence_since', SemanticType.Numeric),
            Column('property_magnitude', SemanticType.Text),
            Column('age', SemanticType.Numeric),
            Column('other_payment_plans', SemanticType.Text),
            Column('housing', SemanticType.Text),
            Column('existing_credits', SemanticType.Numeric),
            Column('job', SemanticType.Text),
            Column('num_dependents', SemanticType.Numeric),
            Column('own_telephone', SemanticType.Text),
            Column('foreign_worker', SemanticType.Text)
        ]
    )
)

# get the model
model = Model(id='c8dbd4fd-4a33-4923-ab17-75e0a5139eb7')

# run batch prediction with dataset we just imported
dataset_id = dataset.id
prediction = model.run_batch_prediction(DatasetSource(dataset_id=dataset_id))

print(prediction.result.to_pandas().head(100))
package com.aiaengine.examples.model;

import com.aiaengine.*;
import com.aiaengine.dataset.request.DownloadRequest;
import com.aiaengine.datasource.DataSource;
import com.aiaengine.datasource.Schema;
import com.aiaengine.datasource.file.CSVFileSettings;
import com.aiaengine.datasource.file.FileSourceRequest;
import com.aiaengine.datasource.file.FileType;
import com.aiaengine.model.request.RunBatchPredictionRequest;
import com.aiaengine.project.request.CreateDatasetRequest;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class BatchPredictionDatasetApp {
    public static void main(String[] args) throws IOException {
        Engine engine = new Engine();
        // import the `German Credit Data Prediction` dataset
        // If you have an existing dataset, you can skip this step and just use the ID of
        // that dataset to run the prediction in the next step
        Project project = engine.getProject("c6e4589e-9cf4-4191-a85b-6eaf5fa80bf5");
        String dataFilePath = "examples/datasets/german-credit-predict.csv";
        List<Schema.Column> columns = new ArrayList<>();
        columns.add(new Schema.Column("checking_status", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("duration", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("credit_history", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("purpose", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("credit_amount", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("savings_status", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("employment", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("installment_commitment", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("personal_status", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("other_parties", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("residence_since", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("property_magnitude", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("age", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("other_payment_plans", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("housing", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("existing_credits", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("job", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("num_dependents", Schema.SemanticType.NUMERIC));
        columns.add(new Schema.Column("own_telephone", Schema.SemanticType.TEXT));
        columns.add(new Schema.Column("foreign_worker", Schema.SemanticType.TEXT));
        DataSource localDataSource = engine.buildFileSource(FileSourceRequest.builder()
                .fileType(FileType.CSV)
                .url(dataFilePath)
                .fileSettings(new CSVFileSettings())
                .schema(new Schema(columns))
                .build());

        Dataset dataset = project.createDataset(CreateDatasetRequest.builder()
                .name("German Credit Data for Prediction")
                .dataSource(localDataSource)
                .timeout(900)
                .build());

        //replace with your model id
        Model model = engine.getModel("9310c78a-3970-457f-afea-6e2e2545eb7b");
        //run batch prediction with dataset we just imported
        Prediction prediction = model.runBatchPrediction(RunBatchPredictionRequest.builder()
                .dataSource(new DatasetSource(dataset.getId()))
                .build());
        //download result to local
        prediction.getResult().download(DownloadRequest.builder().outputFolder("./").build());
    }
}