Hi,
I am trying to run a script using a command job from Azure Machine Learning Workspace. Everything wroked perfectly until I added mlflow.log() to my code. Now I get an exeption:
mlflow.tracking.registry.UnsupportedModelRegistryStoreURIException: Model registry functionality is unavailable; got unsupported URI 'azureml://canadacentral.api.azureml.ms/mlflow/v1.0/subscriptions/CONFIDENTIAL_INFO_HERE/Microsoft.MachineLearningServices/workspaces/' for model registry data storage.
Here is my script:
%%writefile {preprocessing_src_dir}/preprocess.py
"""
Preprocess and save the data.
"""
import pandas as pd
from preprocessor import Preprocessor
import yaml
import argparse
import mlflow
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--input_data", type=str, help="path to input data")
parser.add_argument("--output_data", type=str, help="path to output data")
return parser.parse_args()
def main(args):
# Some code
# Load the data
df = pd.read_csv(args.input_data, encoding='latin-1', skiprows=2)
# -------------------
# Preprocess the data
# -------------------
# Save the preprocessed data
df.to_parquet(args.output_data, index=False)
# Log the preprocessing steps and the data shape
mlflow.log_param("features_to_drop", features_to_drop)
mlflow.log_param("correlated_features_to_drop", corr_features)
mlflow.log_param("dates_features", dates_features)
mlflow.log_param("missing_features", missing_features)
mlflow.log_param("values_to_drop", values_to_drop)
mlflow.log_param("categories_to_merge", categories_to_merge)
mlflow.log_param("features_to_rename", features_to_rename)
mlflow.log_metric("n_samples", df.shape[0])
mlflow.log_metric("n_features", df.shape[1])
print("Preprocessing done!")
if __name__ == "__main__":
print("Running script to preprocess the data...")
mlflow.start_run()
args = parse_args()
main(args)
mlflow.end_run()
Any help would be much appreciated.