I'm trying to deploy a locally trained RandomForest model into Azure Machine Learning Studio.
training code (whentrain.ipynb) :
#import libs and packages
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import r2_score
from math import sqrt
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
import xgboost as xgb
from sklearn.metrics import accuracy_score
from azureml.core import Workspace, Dataset
# get existing workspace
workspace = Workspace.from_config(path="config.json")
# get the datastore to upload prepared data
datastore = workspace.get_default_datastore()
# load the dataset which is placed in the data folder
dataset = Dataset.Tabular.from_delimited_files(datastore.path('UI/12-23-2021_023530_UTC/prepped_data101121.csv'))
dataset = dataset.to_pandas_dataframe()
# Create the outputs directories to save the model and images
os.makedirs('outputs/model', exist_ok=True)
os.makedirs('outputs/output', exist_ok=True)
dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset = dataset.set_index('Date')
###
scaler = MinMaxScaler()
#inputs
X = dataset.iloc[:, 1:]
#output
y = dataset.iloc[:, :1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state= 42, shuffle=True)
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
###
model1 = RandomForestRegressor(n_estimators = 6,
max_depth = 10,
min_samples_leaf= 1,
oob_score = 'True',
random_state=42)
model1.fit(X_train, y_train.values.ravel())
y_pred2 = model1.predict(X_test)
And here is the code on the estimator part (estimator.ipynb):
from azureml.core import Experiment
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.dnn import TensorFlow
from azureml.widgets import RunDetails
import os
workspace = Workspace.from_config(path="config.json")
exp = Experiment(workspace=workspace, name='azure-exp')
cluster_name = "gpucluster"
try:
compute_target = ComputeTarget(workspace=workspace, name=cluster_name)
print('Found existing compute target')
except ComputeTargetException:
print('Creating a new compute target...')
compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2',
max_nodes=1)
compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)
compute_target.wait_for_completion(show_output=True) # , min_node_count=None, timeout_in_minutes=20)
# For a more detailed view of current AmlCompute status, use get_status()
print(compute_target.get_status().serialize())
from azureml.core import ScriptRunConfig
source_directory = os.getcwd()
from azureml.core import Environment
myenv = Environment("user-managed-env")
myenv.python.user_managed_dependencies =True
from azureml.core import Dataset
test_data_ds = Dataset.get_by_name(workspace, name='prepped_data101121')
src = ScriptRunConfig(source_directory=source_directory,
script='whentrain.ipynb',
arguments=['--input-data', test_data_ds.as_named_input('prepped_data101121')],
compute_target=compute_target,
environment=myenv)
run = exp.submit(src)
RunDetails(run).show()
run.wait_for_completion(show_output=True)
The error that happens in run.wait_for_completion states :
[stderr]Traceback (most recent call last):
[stderr] File "whentrain.ipynb", line 107, in <module>
[stderr] "notebookHasBeenCompleted": true
[stderr]NameError: name 'true' is not defined
[stderr]
As you can see in my whentrain.ipynb, it does not even reach line 107, and I could not find where this error come from. So how do I fix it?
I'm running the Notebook on Python 3.
UPDATE:
Okay, after a little adjustment that should not affect the whole code (I just removed some extra columns, added model save code in whentrain.ipynb making use of import os) it's now giving me somewhat the same error.
[stderr]Traceback (most recent call last):
[stderr] File "whentrain.ipynb", line 115, in <module>
[stderr] "source_hidden": false,
[stderr]NameError: name 'false' is not defined
[stderr]