Hyperparameter afstemmen upgraden naar SDK v2

In SDK v2 worden het afstemmen van hyperparameters geconsolideerd in taken.

Een taak heeft een type. De meeste taken zijn opdrachttaken die een commanduitvoeren, zoals python main.py. Wat in een taak wordt uitgevoerd, is agnostisch voor elke programmeertaal, dus u kunt scripts uitvoeren bash , interpreters aanroepen python , een aantal curl opdrachten uitvoeren of iets anders.

Een sweep-taak is een ander type taak, dat instellingen voor opruimen definieert en kan worden gestart door de methode van de opdracht opruimen aan te roepen.

Als u wilt upgraden, moet u uw code wijzigen voor het definiƫren en verzenden van uw hyperparameter-afstemmingsexperiment naar SDK v2. Wat u in de taak uitvoert, hoeft niet te worden bijgewerkt naar SDK v2. Het is echter raadzaam om code die specifiek is voor Azure Machine Learning te verwijderen uit uw modeltrainingsscripts. Deze scheiding zorgt voor een eenvoudigere overgang tussen lokale en cloud en wordt beschouwd als best practice voor volwassen MLOps. In de praktijk betekent dit het verwijderen azureml.* van coderegels. Modelregistratie- en traceringscode moet worden vervangen door MLflow. Zie MLflow gebruiken in v2 voor meer informatie.

Dit artikel bevat een vergelijking van scenario('s) in SDK v1 en SDK v2.

Hyperparameterafstemming uitvoeren in een experiment

  • SDK v1

    from azureml.core import ScriptRunConfig, Experiment, Workspace
    from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
    from azureml.train.hyperdrive import choice, loguniform
    
    dataset = Dataset.get_by_name(ws, 'mnist-dataset')
    
    # list the files referenced by mnist dataset
    dataset.to_path()
    
    #define the search space for your hyperparameters
    param_sampling = RandomParameterSampling(
        {
            '--batch-size': choice(25, 50, 100),
            '--first-layer-neurons': choice(10, 50, 200, 300, 500),
            '--second-layer-neurons': choice(10, 50, 200, 500),
            '--learning-rate': loguniform(-6, -1)
        }
    )
    
    args = ['--data-folder', dataset.as_named_input('mnist').as_mount()]
    
    #Set up your script run
    src = ScriptRunConfig(source_directory=script_folder,
                          script='keras_mnist.py',
                          arguments=args,
                          compute_target=compute_target,
                          environment=keras_env)
    
    # Set early stopping on this one
    early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)
    
    # Define the configurations for your hyperparameter tuning experiment
    hyperdrive_config = HyperDriveConfig(run_config=src,
                                         hyperparameter_sampling=param_sampling,
                                         policy=early_termination_policy,
                                         primary_metric_name='Accuracy',
                                         primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                         max_total_runs=20,
                                         max_concurrent_runs=4)
    # Specify your experiment details                                     
    experiment = Experiment(workspace, experiment_name)
    
    hyperdrive_run = experiment.submit(hyperdrive_config)
    
    #Find the best model
    best_run = hyperdrive_run.get_best_run_by_primary_metric()
    
  • SDK v2

    from azure.ai.ml import MLClient
    from azure.ai.ml import command, Input
    from azure.ai.ml.sweep import Choice, Uniform, MedianStoppingPolicy
    from azure.identity import DefaultAzureCredential
    
    # Create your command
    command_job_for_sweep = command(
        code="./src",
        command="python main.py --iris-csv ${{inputs.iris_csv}} --learning-rate ${{inputs.learning_rate}} --boosting ${{inputs.boosting}}",
        environment="AzureML-lightgbm-3.2-ubuntu18.04-py37-cpu@latest",
        inputs={
            "iris_csv": Input(
                type="uri_file",
                path="https://azuremlexamples.blob.core.windows.net/datasets/iris.csv",
            ),
            #define the search space for your hyperparameters
            "learning_rate": Uniform(min_value=0.01, max_value=0.9),
            "boosting": Choice(values=["gbdt", "dart"]),
        },
        compute="cpu-cluster",
    )
    
    # Call sweep() on your command job to sweep over your parameter expressions
    sweep_job = command_job_for_sweep.sweep(
        compute="cpu-cluster", 
        sampling_algorithm="random",
        primary_metric="test-multi_logloss",
        goal="Minimize",
    )
    
    # Define the limits for this sweep
    sweep_job.set_limits(max_total_trials=20, max_concurrent_trials=10, timeout=7200)
    
    # Set early stopping on this one
    sweep_job.early_termination = MedianStoppingPolicy(delay_evaluation=5, evaluation_interval=2)
    
    # Specify your experiment details
    sweep_job.display_name = "lightgbm-iris-sweep-example"
    sweep_job.experiment_name = "lightgbm-iris-sweep-example"
    sweep_job.description = "Run a hyperparameter sweep job for LightGBM on Iris dataset."
    
    # submit the sweep
    returned_sweep_job = ml_client.create_or_update(sweep_job)
    
    # get a URL for the status of the job
    returned_sweep_job.services["Studio"].endpoint
    
    # Download best trial model output
    ml_client.jobs.download(returned_sweep_job.name, output_name="model")
    

Hyperparameter afstemmen in een pijplijn uitvoeren

  • SDK v1

    
    tf_env = Environment.get(ws, name='AzureML-TensorFlow-2.0-GPU')
    data_folder = dataset.as_mount()
    src = ScriptRunConfig(source_directory=script_folder,
                          script='tf_mnist.py',
                          arguments=['--data-folder', data_folder],
                          compute_target=compute_target,
                          environment=tf_env)
    
    #Define HyperDrive configs
    ps = RandomParameterSampling(
        {
            '--batch-size': choice(25, 50, 100),
            '--first-layer-neurons': choice(10, 50, 200, 300, 500),
            '--second-layer-neurons': choice(10, 50, 200, 500),
            '--learning-rate': loguniform(-6, -1)
        }
    )
    
    early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)
    
    hd_config = HyperDriveConfig(run_config=src, 
                                 hyperparameter_sampling=ps,
                                 policy=early_termination_policy,
                                 primary_metric_name='validation_acc', 
                                 primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                                 max_total_runs=4,
                                 max_concurrent_runs=4)
    
    metrics_output_name = 'metrics_output'
    metrics_data = PipelineData(name='metrics_data',
                                datastore=datastore,
                                pipeline_output_name=metrics_output_name,
                                training_output=TrainingOutput("Metrics"))
    
    model_output_name = 'model_output'
    saved_model = PipelineData(name='saved_model',
                                datastore=datastore,
                                pipeline_output_name=model_output_name,
                                training_output=TrainingOutput("Model",
                                                               model_file="outputs/model/saved_model.pb"))
    #Create HyperDriveStep
    hd_step_name='hd_step01'
    hd_step = HyperDriveStep(
        name=hd_step_name,
        hyperdrive_config=hd_config,
        inputs=[data_folder],
        outputs=[metrics_data, saved_model])                             
    
    #Find and register best model
    conda_dep = CondaDependencies()
    conda_dep.add_pip_package("azureml-sdk")
    
    rcfg = RunConfiguration(conda_dependencies=conda_dep)
    
    register_model_step = PythonScriptStep(script_name='register_model.py',
                                           name="register_model_step01",
                                           inputs=[saved_model],
                                           compute_target=cpu_cluster,
                                           arguments=["--saved-model", saved_model],
                                           allow_reuse=True,
                                           runconfig=rcfg)
    
    register_model_step.run_after(hd_step)
    
    #Run the pipeline
    pipeline = Pipeline(workspace=ws, steps=[hd_step, register_model_step])
    pipeline_run = exp.submit(pipeline)
    
    
  • SDK v2

    train_component_func = load_component(path="./train.yml")
    score_component_func = load_component(path="./predict.yml")
    
    # define a pipeline
    @pipeline()
    def pipeline_with_hyperparameter_sweep():
        """Tune hyperparameters using sample components."""
        train_model = train_component_func(
            data=Input(
                type="uri_file",
                path="wasbs://datasets@azuremlexamples.blob.core.windows.net/iris.csv",
            ),
            c_value=Uniform(min_value=0.5, max_value=0.9),
            kernel=Choice(["rbf", "linear", "poly"]),
            coef0=Uniform(min_value=0.1, max_value=1),
            degree=3,
            gamma="scale",
            shrinking=False,
            probability=False,
            tol=0.001,
            cache_size=1024,
            verbose=False,
            max_iter=-1,
            decision_function_shape="ovr",
            break_ties=False,
            random_state=42,
        )
        sweep_step = train_model.sweep(
            primary_metric="training_f1_score",
            goal="minimize",
            sampling_algorithm="random",
            compute="cpu-cluster",
        )
        sweep_step.set_limits(max_total_trials=20, max_concurrent_trials=10, timeout=7200)
    
        score_data = score_component_func(
            model=sweep_step.outputs.model_output, test_data=sweep_step.outputs.test_data
        )
    
    
    pipeline_job = pipeline_with_hyperparameter_sweep()
    
    # set pipeline level compute
    pipeline_job.settings.default_compute = "cpu-cluster"
    
    # submit job to workspace
    pipeline_job = ml_client.jobs.create_or_update(
        pipeline_job, experiment_name="pipeline_samples"
    )
    pipeline_job
    

Toewijzing van belangrijke functionaliteit in SDK v1 en SDK v2

Functionaliteit in SDK v1 Ruwe toewijzing in SDK v2
HyperDriveRunConfig() SweepJob()
hyperdrive-pakket sweep-pakket

Volgende stappen

Zie voor meer informatie: