Basic classification: Classify data with the QDK

In this guide, you will learn how to run a quantum sequential classifier written in Q# using the Quantum Machine Learning library of the QDK. To do that, we will train a simple sequential model using a classifier structure defined in Q#. The model is trained on a half-moon dataset with training and validation data that you can find in the code samples. We will create our Q# project using either a Python or a C# program to load data and call Q# operations from.

Prerequisites

dotnet add package Microsoft.Quantum.MachineLearning

Q# classifier code

We start by creating a a file called Training.qsand adding the following code to it:

namespace Microsoft.Quantum.Samples {
    open Microsoft.Quantum.Convert;
    open Microsoft.Quantum.Intrinsic;
    open Microsoft.Quantum.Canon;
    open Microsoft.Quantum.Arrays;
    open Microsoft.Quantum.MachineLearning;
    open Microsoft.Quantum.Math;

    function WithProductKernel(scale : Double, sample : Double[]) : Double[] {
        return sample + [scale * Fold(TimesD, 1.0, sample)];
    }

    function Preprocessed(samples : Double[][]) : Double[][] {
        let scale = 1.0;

        return Mapped(
            WithProductKernel(scale, _),
            samples
        );
    }

    function DefaultSchedule(samples : Double[][]) : SamplingSchedule {
        return SamplingSchedule([
            0..Length(samples) - 1
        ]);
    }

    function ClassifierStructure() : ControlledRotation[] {
        return [
            ControlledRotation((0, new Int[0]), PauliX, 4),
            ControlledRotation((0, new Int[0]), PauliZ, 5),
            ControlledRotation((1, new Int[0]), PauliX, 6),
            ControlledRotation((1, new Int[0]), PauliZ, 7),
            ControlledRotation((0, [1]), PauliX, 0),
            ControlledRotation((1, [0]), PauliX, 1),
            ControlledRotation((1, new Int[0]), PauliZ, 2),
            ControlledRotation((1, new Int[0]), PauliX, 3)
        ];
    }

    operation TrainHalfMoonModel(
        trainingVectors : Double[][],
        trainingLabels : Int[],
        initialParameters : Double[][]
    ) : (Double[], Double) {
        let samples = Mapped(
            LabeledSample,
            Zipped(Preprocessed(trainingVectors), trainingLabels)
        );
        Message("Ready to train.");
        let (optimizedModel, nMisses) = TrainSequentialClassifier(
            Mapped(
                SequentialModel(ClassifierStructure(), _, 0.0),
                initialParameters
            ),
            samples,
            DefaultTrainingOptions()
                w/ LearningRate <- 0.1
                w/ MinibatchSize <- 15
                w/ Tolerance <- 0.005
                w/ NMeasurements <- 10000
                w/ MaxEpochs <- 16
                w/ VerboseMessage <- Message,
            DefaultSchedule(trainingVectors),
            DefaultSchedule(trainingVectors)
        );
        Message($"Training complete, found optimal parameters: {optimizedModel::Parameters}");
        return (optimizedModel::Parameters, optimizedModel::Bias);
    }

    operation ValidateHalfMoonModel(
        validationVectors : Double[][],
        validationLabels : Int[],
        parameters : Double[],
        bias : Double
    ) : Double {
        let samples = Mapped(
            LabeledSample,
            Zipped(Preprocessed(validationVectors), validationLabels)
        );
        let tolerance = 0.005;
        let nMeasurements = 10000;
        let results = ValidateSequentialClassifier(
            SequentialModel(ClassifierStructure(), parameters, bias),
            samples,
            tolerance,
            nMeasurements,
            DefaultSchedule(validationVectors)
        );
        return IntAsDouble(results::NMisclassifications) / IntAsDouble(Length(samples));
    }

    operation ClassifyHalfMoonModel(
        samples : Double[][],
        parameters : Double[],
        bias : Double,
        tolerance  : Double,
        nMeasurements : Int
    )
    : Int[] {
        let model = Default<SequentialModel>()
            w/ Structure <- ClassifierStructure()
            w/ Parameters <- parameters
            w/ Bias <- bias;
        let features = Preprocessed(samples);
        let probabilities = EstimateClassificationProbabilities(
            tolerance, model,
            features, nMeasurements
        );
        return InferredLabels(model::Bias, probabilities);
    }

}

The most important functions and operations defined in the code above are:

  • ClassifierStructure() : ControlledRotation[] : in this function we set the structure of our circuit model by adding the layers of the controlled gates we consider. This step is analogous to the declaration of layers of neurons in a sequential deep learning model.
  • TrainHalfMoonModel() : (Double[], Double) : this operation is the core part of the code and defines the training. Here we load the samples from the dataset included in the library, we set the hyper parameters and the initial parameters for the training and we start the training by calling the operation TrainSequentialClassifier included in the library. It outputs the parameters and the bias that determine the classifier.
  • ValidateHalfMoonModel(parameters : Double[], bias : Double) : Int : this operation defines the validation process to evaluate the model. Here we load the samples for validation, the number of measurements per sample and the tolerance. It outputs the number of misclassifications on the chosen batch of samples for validation.

Host program

Next, in the same folder we create a host program. Your host program consists of three parts:

  • Load the dataset data.json and choose a set of classifier parameters where you want to start your training iterations for your model.

  • Run training to determine the parameters and bias of the model.

  • After training, validate the model to determine its accuracy.

    To run your the Q# classifier from Python, save the following code as host.py. Remember that you also need the Q# file Training.qs that is explained above in this tutorial.

    
    import json
    
    import numpy as np
    import matplotlib.pyplot as plt
    import matplotlib.colors as colors
    import matplotlib.cm as cmx
    plt.style.use('ggplot')
    
    import qsharp
    
    from Microsoft.Quantum.Samples import (
        TrainHalfMoonModel, ValidateHalfMoonModel, ClassifyHalfMoonModel
    )
    
    if __name__ == "__main__":
        with open('data.json') as f:
            data = json.load(f)
        parameter_starting_points = [
            [0.060057, 3.00522,  2.03083,  0.63527,  1.03771, 1.27881, 4.10186,  5.34396],
            [0.586514, 3.371623, 0.860791, 2.92517,  1.14616, 2.99776, 2.26505,  5.62137],
            [1.69704,  1.13912,  2.3595,   4.037552, 1.63698, 1.27549, 0.328671, 0.302282],
            [5.21662,  6.04363,  0.224184, 1.53913,  1.64524, 4.79508, 1.49742,  1.545]
         ]
    
        (parameters, bias) = TrainHalfMoonModel.simulate(
            trainingVectors=data['TrainingData']['Features'],
            trainingLabels=data['TrainingData']['Labels'],
            initialParameters=parameter_starting_points
        )
    
        miss_rate = ValidateHalfMoonModel.simulate(
            validationVectors=data['ValidationData']['Features'],
            validationLabels=data['ValidationData']['Labels'],
            parameters=parameters, bias=bias
        )
    
        print(f"Miss rate: {miss_rate:0.2%}")
    
        # Classify the validation so that we can plot it.
        actual_labels = data['ValidationData']['Labels']
        classified_labels = ClassifyHalfMoonModel.simulate(
            samples=data['ValidationData']['Features'],
            parameters=parameters, bias=bias,
            tolerance=0.005, nMeasurements=10_000
        )
    
    
        # To plot samples, it's helpful to have colors for each.
        # We'll plot four cases:
        # - actually 0, classified as 0
        # - actually 0, classified as 1
        # - actually 1, classified as 1
        # - actually 1, classified as 0
        cases = [
            (0, 0),
            (0, 1),
            (1, 1),
            (1, 0)
        ]
        # We can use these cases to define markers and colormaps for plotting.
        markers = [
            '.' if actual == classified else 'x'
            for (actual, classified) in cases
        ]
        colormap = cmx.ScalarMappable(colors.Normalize(vmin=0, vmax=len(cases) - 1))
        colors = [colormap.to_rgba(idx_case) for (idx_case, case) in enumerate(cases)]
    
        # It's also really helpful to have the samples as a NumPy array so that we
        # can find masks for each of the four cases.
        samples = np.array(data['ValidationData']['Features'])
    
        # Finally, we loop over the cases above and plot the samples that match
        # each.
        for (idx_case, ((actual, classified), marker, color)) in enumerate(zip(cases, markers, colors)):
            mask = np.logical_and(
                np.equal(actual_labels, actual),
                np.equal(classified_labels, classified)
            )
            if not np.any(mask):
                continue
            plt.scatter(
                samples[mask, 0],
                samples[mask, 1],
                c=[color],
                label=f"Was {actual}, classified {classified}",
                marker=marker
            )
        plt.legend()
        plt.show()
    

    You can then run your Python host program from the command line:

    $ python host.py
    
    Preparing Q# environment...
    [...]
    Observed X.XX% misclassifications.
    

Next steps

First, you can play with the code and try to change some parameters to see how it affects the training. Then, in the next tutorial, Design your own classifier, you will learn how to define the structure of the classifier.