Dataset
This section shows how to create, modify and list datasets in AI & Analytics Engine using the SDK
Creating a dataset
Here we show an example of creating a dataset by uploading a csv file from the local file system.
from aiaengine import Org, Project, FileSource, Column, DataType
# create a new demo project in the org
org = Org(id='b6240512-cd17-43a0-8297-84c51c1bc5a0') # replace with your org ID
project = org.create_project(name="Demo project using Python SDK", description="Your demo project")
# or you can get an existing project that you want to work on
# project = Project(id='ID_of_your_project') # replace with your own project ID
# import the `German Credit Data` dataset
data_file = 'examples/datasets/german-credit.csv'
# You can use the `print_schema` utility function to print the auto-inferred schema
# print_schema(pd.read_csv(data_file, header=0))
dataset = project.create_dataset(
name=f"German Credit Data",
data_source=FileSource(
file_urls=[data_file],
schema=[
Column('checking_status', DataType.Text),
Column('duration', DataType.Numeric),
Column('credit_history', DataType.Text),
Column('purpose', DataType.Text),
Column('credit_amount', DataType.Numeric),
Column('savings_status', DataType.Text),
Column('employment', DataType.Text),
Column('installment_commitment', DataType.Numeric),
Column('personal_status', DataType.Text),
Column('other_parties', DataType.Text),
Column('residence_since', DataType.Numeric),
Column('property_magnitude', DataType.Text),
Column('age', DataType.Numeric),
Column('other_payment_plans', DataType.Text),
Column('housing', DataType.Text),
Column('existing_credits', DataType.Numeric),
Column('job', DataType.Text),
Column('num_dependents', DataType.Numeric),
Column('own_telephone', DataType.Text),
Column('foreign_worker', DataType.Text),
Column('class', DataType.Text)
]
)
)
print(dataset.id)
package com.aiaengine.examples.dataset;
import com.aiaengine.Dataset;
import com.aiaengine.Engine;
import com.aiaengine.Org;
import com.aiaengine.Project;
import com.aiaengine.datasource.DataSource;
import com.aiaengine.datasource.Schema;
import com.aiaengine.datasource.file.CSVFileSettings;
import com.aiaengine.datasource.file.FileSourceRequest;
import com.aiaengine.datasource.file.FileType;
import com.aiaengine.org.request.CreateProjectRequest;
import com.aiaengine.project.request.CreateDatasetRequest;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
public class ImportCsvApp {
public static void main(String[] args) throws FileNotFoundException {
Engine engine = new Engine();
// create a new demo project in the org
Org org = engine.getOrg("cae24b10-e6b0-4d61-8cef-a9f4b8f6133d"); // replace with your org ID
Project project = org.createProject(CreateProjectRequest.builder()
.name("Demo project using Java SDK")
.description("Your demo project")
.build());
// or you can get an existing project that you want to work on
// Project project = engine.getProject("ID_of_your_project") // replace with your own project ID
String dataFilePath = "examples/datasets/german-credit.csv";
List<Schema.Column> columns = new ArrayList<>();
columns.add(new Schema.Column("checking_status", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("duration", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("credit_history", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("purpose", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("credit_amount", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("savings_status", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("employment", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("installment_commitment", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("personal_status", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("other_parties", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("residence_since", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("property_magnitude", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("age", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("other_payment_plans", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("housing", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("existing_credits", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("job", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("num_dependents", Schema.SemanticType.NUMERIC));
columns.add(new Schema.Column("own_telephone", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("foreign_worker", Schema.SemanticType.TEXT));
columns.add(new Schema.Column("class", Schema.SemanticType.TEXT));
DataSource localDataSource = engine.buildFileSource(FileSourceRequest.builder()
.fileType(FileType.CSV)
.url(dataFilePath)
.fileSettings(new CSVFileSettings())
.schema(new Schema(columns))
.build());
Dataset dataset = project.createDataset(CreateDatasetRequest.builder()
.name("German Credit Data")
.dataSource(localDataSource)
.timeout(900)
.build());
System.out.println(dataset.getId());
}
}
Refer Data Import for more examples.
Retrieving information of a dataset
Once a dataset is imported into the platform, you can get information about this dataset with input of the dataset ID.
package com.aiaengine.examples.dataset;
import com.aiaengine.Dataset;
import com.aiaengine.Engine;
import java.io.FileNotFoundException;
public class GetDatasetApp {
public static void main(String[] args) throws FileNotFoundException {
Engine engine = new Engine();
Dataset ds = engine.getDataset("f55717be-14dd-40ea-8cad-f71370d5b961");
System.out.println(ds.getName());
}
}
Updating a dataset
You can also modify the name and description of an existing dataset in the platform.
package com.aiaengine.examples.dataset;
import com.aiaengine.Dataset;
import com.aiaengine.Engine;
import com.aiaengine.org.request.UpdateOrgRequest;
import java.io.FileNotFoundException;
public class UpdateDatasetApp {
public static void main(String[] args) throws FileNotFoundException {
Engine engine = new Engine();
Dataset ds = engine.getDataset("93955467-8139-4423-9a02-d6cb4c422927");
ds.update(UpdateOrgRequest.builder()
.name("Updated Dataset")
.description("This dataset has been updated")
.build());
}
}
Listing datasets in a project
For a particular project, you can list all datasets by giving the project id.
package com.aiaengine.examples.dataset;
import com.aiaengine.Dataset;
import com.aiaengine.Engine;
import com.aiaengine.Project;
import java.io.FileNotFoundException;
import java.util.List;
public class ListDatasetApp {
public static void main(String[] args) throws FileNotFoundException {
Engine engine = new Engine();
// you can get an existing project that you want to work on
Project project = engine.getProject("403a448d-9d86-497f-a9f6-414afa72a415");
List<Dataset> datasets = project.listDatasets();
datasets.forEach(ds -> System.out.println(ds.getName()));
}
}
Deleting a dataset
If a particular dataset is no longer in use, you can remove it by specifying the dataset id.
package com.aiaengine.examples.dataset;
import com.aiaengine.Dataset;
import com.aiaengine.Engine;
import java.io.FileNotFoundException;
public class DeleteDatasetApp {
public static void main(String[] args) throws FileNotFoundException {
Engine engine = new Engine();
Dataset ds = engine.getDataset("f55717be-14dd-40ea-8cad-f71370d5b961");
ds.delete();
}
}