Dela via


FactorizationMachineExtensions.FieldAwareFactorizationMachine Method

Definition

Overloads

FieldAwareFactorizationMachine(BinaryClassificationCatalog+BinaryClassificationTrainers, FieldAwareFactorizationMachineTrainer+Options)

Create FieldAwareFactorizationMachineTrainer using advanced options, which predicts a target using a field-aware factorization machine trained over boolean label data.

FieldAwareFactorizationMachine(BinaryClassificationCatalog+BinaryClassificationTrainers, String, String, String)

Create FieldAwareFactorizationMachineTrainer, which predicts a target using a field-aware factorization machine trained over boolean label data.

FieldAwareFactorizationMachine(BinaryClassificationCatalog+BinaryClassificationTrainers, String[], String, String)

Create FieldAwareFactorizationMachineTrainer, which predicts a target using a field-aware factorization machine trained over boolean label data.

FieldAwareFactorizationMachine(BinaryClassificationCatalog+BinaryClassificationTrainers, FieldAwareFactorizationMachineTrainer+Options)

Create FieldAwareFactorizationMachineTrainer using advanced options, which predicts a target using a field-aware factorization machine trained over boolean label data.

public static Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine (this Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer.Options options);
static member FieldAwareFactorizationMachine : Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers * Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer.Options -> Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer
<Extension()>
Public Function FieldAwareFactorizationMachine (catalog As BinaryClassificationCatalog.BinaryClassificationTrainers, options As FieldAwareFactorizationMachineTrainer.Options) As FieldAwareFactorizationMachineTrainer

Parameters

catalog
BinaryClassificationCatalog.BinaryClassificationTrainers

The binary classification catalog trainer object.

Returns

Examples

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;

namespace Samples.Dynamic.Trainers.BinaryClassification
{
    public static class FieldAwareFactorizationMachineWithOptions
    {
        // This example first train a field-aware factorization to binary
        // classification, measure the trained model's quality, and finally
        // use the trained model to make prediction.
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for
            // exception tracking and logging, as a catalog of available operations
            // and as the source of randomness. Setting the seed to a fixed number
            // in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            IEnumerable<DataPoint> data = GenerateRandomDataPoints(500);

            // Convert the list of data points to an IDataView object, which is
            // consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(data);

            // Define trainer options.
            var options = new FieldAwareFactorizationMachineTrainer.Options
            {
                FeatureColumnName = nameof(DataPoint.Field0),
                ExtraFeatureColumns =
                new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) },

                LabelColumnName = nameof(DataPoint.Label),
                LambdaLatent = 0.01f,
                LambdaLinear = 0.001f,
                LatentDimension = 16,
                NumberOfIterations = 50,
                LearningRate = 0.5f
            };

            // Define the trainer.
            // This trainer trains field-aware factorization (FFM)
            // for binary classification.
            // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory
            // behind and
            // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the
            // training algorithm implemented in ML.NET.
            var pipeline = mlContext.BinaryClassification.Trainers
                .FieldAwareFactorizationMachine(options);

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Run the model on training data set.
            var transformedTrainingData = model.Transform(trainingData);

            // Measure the quality of the trained model.
            var metrics = mlContext.BinaryClassification
                .Evaluate(transformedTrainingData);

            // Show the quality metrics.
            PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.99
            //   AUC: 1.00
            //   F1 Score: 0.99
            //   Negative Precision: 1.00
            //   Negative Recall: 0.98
            //   Positive Precision: 0.98
            //   Positive Recall: 1.00
            //   Log Loss: 0.17
            //   Log Loss Reduction: 0.83
            //   Entropy: 1.00
            //
            //  TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
            //  Confusion table
            //            ||======================
            //  PREDICTED || positive | negative | Recall
            //  TRUTH     ||======================
            //   positive ||      199 |       39 | 0.8361
            //   negative ||       69 |      193 | 0.7366
            //            ||======================
            //  Precision ||   0.7425 |   0.8319 |

            // Create prediction function from the trained model.
            var engine = mlContext.Model
                .CreatePredictionEngine<DataPoint, Result>(model);

            // Make some predictions.
            foreach (var dataPoint in data.Take(5))
            {
                var result = engine.Predict(dataPoint);
                Console.WriteLine($"Actual label: {dataPoint.Label}, "
                    + $"predicted label: {result.PredictedLabel}, "
                    + $"score of being positive class: {result.Score}, "
                    + $"and probability of beling positive class: "
                    + $"{result.Probability}.");

            }

            // Expected output:
            //   Actual label: True, predicted label: True, score of being positive class: 1.115094, and probability of being positive class: 0.7530775.
            //   Actual label: False, predicted label: False, score of being positive class: -3.478797, and probability of being positive class: 0.02992158.
            //   Actual label: True, predicted label: True, score of being positive class: 3.191896, and probability of being positive class: 0.9605282.
            //   Actual label: False, predicted label: False, score of being positive class: -3.400863, and probability of being positive class: 0.03226851.
            //   Actual label: True, predicted label: True, score of being positive class: 4.06056, and probability of being positive class: 0.9830528.
        }

        // Number of features per field.
        const int featureLength = 5;

        // This class defines objects fed to the trained model.
        private class DataPoint
        {
            // Label.
            public bool Label { get; set; }

            // Features from the first field. Note that different fields can have
            // different numbers of features.
            [VectorType(featureLength)]
            public float[] Field0 { get; set; }

            // Features from the second field. 
            [VectorType(featureLength)]
            public float[] Field1 { get; set; }

            // Features from the thrid field. 
            [VectorType(featureLength)]
            public float[] Field2 { get; set; }
        }

        // This class defines objects produced by trained model. The trained model
        // maps a DataPoint to a Result.
        public class Result
        {
            // Label.
            public bool Label { get; set; }
            // Predicted label.
            public bool PredictedLabel { get; set; }
            // Predicted score.
            public float Score { get; set; }
            // Probability of belonging to positive class.
            public float Probability { get; set; }
        }

        // Function used to create toy data sets.
        private static IEnumerable<DataPoint> GenerateRandomDataPoints(
            int exampleCount, int seed = 0)

        {
            var rnd = new Random(seed);
            var data = new List<DataPoint>();
            for (int i = 0; i < exampleCount; ++i)
            {
                // Initialize an example with a random label and an empty feature
                // vector.
                var sample = new DataPoint()
                {
                    Label = rnd.Next() % 2 == 0,
                    Field0 = new float[featureLength],
                    Field1 = new float[featureLength],
                    Field2 = new float[featureLength]
                };

                // Fill feature vectors according the assigned label.
                // Notice that features from different fields have different biases
                // and therefore different distributions. In practices such as game
                // recommendation, one may use one field to store features from user
                // profile and another field to store features from game profile.
                for (int j = 0; j < featureLength; ++j)
                {
                    var value0 = (float)rnd.NextDouble();
                    // Positive class gets larger feature value.
                    if (sample.Label)
                        value0 += 0.2f;
                    sample.Field0[j] = value0;

                    var value1 = (float)rnd.NextDouble();
                    // Positive class gets smaller feature value.
                    if (sample.Label)
                        value1 -= 0.2f;
                    sample.Field1[j] = value1;

                    var value2 = (float)rnd.NextDouble();
                    // Positive class gets larger feature value.
                    if (sample.Label)
                        value2 += 0.8f;
                    sample.Field2[j] = value2;
                }

                data.Add(sample);
            }
            return data;
        }

        // Function used to show evaluation metrics such as accuracy of predictions.
        private static void PrintMetrics(
            CalibratedBinaryClassificationMetrics metrics)

        {
            Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
            Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
            Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
            Console.WriteLine($"Negative Precision: " +
                $"{metrics.NegativePrecision:F2}");

            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
            Console.WriteLine($"Positive Precision: " +
                $"{metrics.PositivePrecision:F2}");

            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
            Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}");
            Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}");
            Console.WriteLine($"Entropy: {metrics.Entropy:F2}");
        }
    }
}

Applies to

FieldAwareFactorizationMachine(BinaryClassificationCatalog+BinaryClassificationTrainers, String, String, String)

Create FieldAwareFactorizationMachineTrainer, which predicts a target using a field-aware factorization machine trained over boolean label data.

public static Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine (this Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string featureColumnName = "Features", string labelColumnName = "Label", string exampleWeightColumnName = default);
static member FieldAwareFactorizationMachine : Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers * string * string * string -> Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer
<Extension()>
Public Function FieldAwareFactorizationMachine (catalog As BinaryClassificationCatalog.BinaryClassificationTrainers, Optional featureColumnName As String = "Features", Optional labelColumnName As String = "Label", Optional exampleWeightColumnName As String = Nothing) As FieldAwareFactorizationMachineTrainer

Parameters

catalog
BinaryClassificationCatalog.BinaryClassificationTrainers

The binary classification catalog trainer object.

featureColumnName
String

The name of the feature column. The column data must be a known-sized vector of Single.

labelColumnName
String

The name of the label column. The column data must be Boolean.

exampleWeightColumnName
String

The name of the example weight column (optional).

Returns

Examples

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic.Trainers.BinaryClassification
{
    public static class FactorizationMachine
    {
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for
            // exception tracking and logging, as a catalog of available operations
            // and as the source of randomness. Setting the seed to a fixed number
            // in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            var dataPoints = GenerateRandomDataPoints(1000);

            // Convert the list of data points to an IDataView object, which is
            // consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

            // ML.NET doesn't cache data set by default. Therefore, if one reads a
            // data set from a file and accesses it many times, it can be slow due
            // to expensive featurization and disk operations. When the considered
            // data can fit into memory, a solution is to cache the data in memory.
            // Caching is especially helpful when working with iterative algorithms 
            // which needs many data passes.
            trainingData = mlContext.Data.Cache(trainingData);

            // Define the trainer.
            var pipeline = mlContext.BinaryClassification.Trainers
                .FieldAwareFactorizationMachine();

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create testing data. Use different random seed to make it different
            // from training data.
            var testData = mlContext.Data
                .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));

            // Run the model on test data set.
            var transformedTestData = model.Transform(testData);

            // Convert IDataView object to a list.
            var predictions = mlContext.Data
                .CreateEnumerable<Prediction>(transformedTestData,
                reuseRowObject: false).ToList();

            // Print 5 predictions.
            foreach (var p in predictions.Take(5))
                Console.WriteLine($"Label: {p.Label}, "
                    + $"Prediction: {p.PredictedLabel}");

            // Expected output:
            //   Label: True, Prediction: False
            //   Label: False, Prediction: False
            //   Label: True, Prediction: False
            //   Label: True, Prediction: False
            //   Label: False, Prediction: False

            // Evaluate the overall metrics.
            var metrics = mlContext.BinaryClassification
                .Evaluate(transformedTestData);

            PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.55
            //   AUC: 0.54
            //   F1 Score: 0.23
            //   Negative Precision: 0.54
            //   Negative Recall: 0.92
            //   Positive Precision: 0.62
            //   Positive Recall: 0.14
            //
            //   TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
            //   Confusion table
            //             ||======================
            //   PREDICTED || positive | negative | Recall
            //   TRUTH     ||======================
            //    positive ||      203 |       35 | 0.8529
            //    negative ||       21 |      241 | 0.9198
            //             ||======================
            //   Precision ||   0.9063 |   0.8732 |
        }

        private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
            int seed = 0)

        {
            var random = new Random(seed);
            float randomFloat() => (float)random.NextDouble();
            for (int i = 0; i < count; i++)
            {
                var label = randomFloat() > 0.5f;
                yield return new DataPoint
                {
                    Label = label,
                    // Create random features that are correlated with the label.
                    // For data points with false label, the feature values are
                    // slightly increased by adding a constant.
                    Features = Enumerable.Repeat(label, 50)
                        .Select(x => x ? randomFloat() : randomFloat() +
                        0.1f).ToArray()

                };
            }
        }

        // Example with label and 50 feature values. A data set is a collection of
        // such examples.
        private class DataPoint
        {
            public bool Label { get; set; }
            [VectorType(50)]
            public float[] Features { get; set; }
        }

        // Class used to capture predictions.
        private class Prediction
        {
            // Original label.
            public bool Label { get; set; }
            // Predicted label from the trainer.
            public bool PredictedLabel { get; set; }
        }

        // Pretty-print BinaryClassificationMetrics objects.
        private static void PrintMetrics(BinaryClassificationMetrics metrics)
        {
            Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
            Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
            Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
            Console.WriteLine($"Negative Precision: " +
                $"{metrics.NegativePrecision:F2}");

            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
            Console.WriteLine($"Positive Precision: " +
                $"{metrics.PositivePrecision:F2}");

            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
            Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
        }
    }
}

Remarks

Note that because there is only one feature column, the underlying model is equivalent to standard factorization machine.

Applies to

FieldAwareFactorizationMachine(BinaryClassificationCatalog+BinaryClassificationTrainers, String[], String, String)

Create FieldAwareFactorizationMachineTrainer, which predicts a target using a field-aware factorization machine trained over boolean label data.

public static Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine (this Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string[] featureColumnNames, string labelColumnName = "Label", string exampleWeightColumnName = default);
static member FieldAwareFactorizationMachine : Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers * string[] * string * string -> Microsoft.ML.Trainers.FieldAwareFactorizationMachineTrainer
<Extension()>
Public Function FieldAwareFactorizationMachine (catalog As BinaryClassificationCatalog.BinaryClassificationTrainers, featureColumnNames As String(), Optional labelColumnName As String = "Label", Optional exampleWeightColumnName As String = Nothing) As FieldAwareFactorizationMachineTrainer

Parameters

catalog
BinaryClassificationCatalog.BinaryClassificationTrainers

The binary classification catalog trainer object.

featureColumnNames
String[]

The names of the feature columns. The column data must be a known-sized vector of Single.

labelColumnName
String

The name of the label column. The column data must be Boolean.

exampleWeightColumnName
String

The name of the example weight column (optional).

Returns

Examples

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic.Trainers.BinaryClassification
{
    public static class FieldAwareFactorizationMachine
    {
        // This example first train a field-aware factorization to binary
        // classification, measure the trained model's quality, and finally
        // use the trained model to make prediction.
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for
            // exception tracking and logging, as a catalog of available operations
            // and as the source of randomness. Setting the seed to a fixed number
            // in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            IEnumerable<DataPoint> data = GenerateRandomDataPoints(500);

            // Convert the list of data points to an IDataView object, which is
            // consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(data);

            // Define the trainer.
            // This trainer trains field-aware factorization (FFM)
            // for binary classification.
            // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory
            // behind and 
            // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the
            // training algorithm implemented in ML.NET.
            var pipeline = mlContext.BinaryClassification.Trainers
                .FieldAwareFactorizationMachine(
                // Specify three feature columns!
                new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1),
                nameof(DataPoint.Field2) },
                // Specify binary label's column name.
                nameof(DataPoint.Label));

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Run the model on training data set.
            var transformedTrainingData = model.Transform(trainingData);

            // Measure the quality of the trained model.
            var metrics = mlContext.BinaryClassification
                .Evaluate(transformedTrainingData);

            // Show the quality metrics.
            PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.99
            //   AUC: 1.00
            //   F1 Score: 0.99
            //   Negative Precision: 1.00
            //   Negative Recall: 0.98
            //   Positive Precision: 0.98
            //   Positive Recall: 1.00
            //   Log Loss: 0.17
            //   Log Loss Reduction: 0.83
            //   Entropy: 1.00
            //
            //   TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
            //   Confusion table
            //             ||======================
            //   PREDICTED || positive | negative | Recall
            //   TRUTH     ||======================
            //    positive ||      193 |       45 | 0.8109
            //    negative ||       52 |      210 | 0.8015
            //             ||======================
            //   Precision ||   0.7878 |   0.8235 |

            // Create prediction function from the trained model.
            var engine = mlContext.Model
                .CreatePredictionEngine<DataPoint, Result>(model);

            // Make some predictions.
            foreach (var dataPoint in data.Take(5))
            {
                var result = engine.Predict(dataPoint);
                Console.WriteLine($"Actual label: {dataPoint.Label}, "
                    + $"predicted label: {result.PredictedLabel}, "
                    + $"score of being positive class: {result.Score}, "
                    + $"and probability of beling positive class: "
                    + $"{result.Probability}.");

            }

            // Expected output:
            //   Actual label: True, predicted label: True, score of being positive class: 1.115094, and probability of being positive class: 0.7530775.
            //   Actual label: False, predicted label: False, score of being positive class: -3.478797, and probability of being positive class: 0.02992158.
            //   Actual label: True, predicted label: True, score of being positive class: 3.191896, and probability of being positive class: 0.9605282.
            //   Actual label: False, predicted label: False, score of being positive class: -3.400863, and probability of being positive class: 0.03226851.
            //   Actual label: True, predicted label: True, score of being positive class: 4.06056, and probability of being positive class: 0.9830528.
        }

        // Number of features per field.
        const int featureLength = 5;

        // This class defines objects fed to the trained model.
        private class DataPoint
        {
            // Label.
            public bool Label { get; set; }

            // Features from the first field. Note that different fields can have
            // different numbers of features.
            [VectorType(featureLength)]
            public float[] Field0 { get; set; }

            // Features from the second field. 
            [VectorType(featureLength)]
            public float[] Field1 { get; set; }

            // Features from the thrid field. 
            [VectorType(featureLength)]
            public float[] Field2 { get; set; }
        }

        // This class defines objects produced by trained model. The trained model
        // maps a DataPoint to a Result.
        public class Result
        {
            // Label.
            public bool Label { get; set; }
            // Predicted label.
            public bool PredictedLabel { get; set; }
            // Predicted score.
            public float Score { get; set; }
            // Probability of belonging to positive class.
            public float Probability { get; set; }
        }

        // Function used to create toy data sets.
        private static IEnumerable<DataPoint> GenerateRandomDataPoints(
            int exampleCount, int seed = 0)

        {
            var rnd = new Random(seed);
            var data = new List<DataPoint>();
            for (int i = 0; i < exampleCount; ++i)
            {
                // Initialize an example with a random label and an empty feature
                // vector.
                var sample = new DataPoint()
                {
                    Label = rnd.Next() % 2 == 0,
                    Field0 = new float[featureLength],
                    Field1 = new float[featureLength],
                    Field2 = new float[featureLength]
                };

                // Fill feature vectors according the assigned label.
                // Notice that features from different fields have different biases
                // and therefore different distributions. In practices such as game
                // recommendation, one may use one field to store features from user
                // profile and another field to store features from game profile.
                for (int j = 0; j < featureLength; ++j)
                {
                    var value0 = (float)rnd.NextDouble();
                    // Positive class gets larger feature value.
                    if (sample.Label)
                        value0 += 0.2f;
                    sample.Field0[j] = value0;

                    var value1 = (float)rnd.NextDouble();
                    // Positive class gets smaller feature value.
                    if (sample.Label)
                        value1 -= 0.2f;
                    sample.Field1[j] = value1;

                    var value2 = (float)rnd.NextDouble();
                    // Positive class gets larger feature value.
                    if (sample.Label)
                        value2 += 0.8f;
                    sample.Field2[j] = value2;
                }

                data.Add(sample);
            }
            return data;
        }

        // Function used to show evaluation metrics such as accuracy of predictions.
        private static void PrintMetrics(
            CalibratedBinaryClassificationMetrics metrics)

        {
            Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
            Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
            Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
            Console.WriteLine($"Negative Precision: " +
                $"{metrics.NegativePrecision:F2}");

            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
            Console.WriteLine($"Positive Precision: " +
                $"{metrics.PositivePrecision:F2}");

            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
            Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}");
            Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}");
            Console.WriteLine($"Entropy: {metrics.Entropy:F2}");
        }
    }
}

Applies to