FeatureSelectionCatalog.SelectFeaturesBasedOnCount 方法

定义

重载

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], Int64)

创建一个 CountFeatureSelectingEstimator,用于选择非默认值计数大于或等于阈值的槽。

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, String, String, Int64)

创建一个 CountFeatureSelectingEstimator,用于选择非默认值计数大于或等于阈值的槽。

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], Int64)

创建一个 CountFeatureSelectingEstimator,用于选择非默认值计数大于或等于阈值的槽。

public static Microsoft.ML.Transforms.CountFeatureSelectingEstimator SelectFeaturesBasedOnCount (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, Microsoft.ML.InputOutputColumnPair[] columns, long count = 1);
static member SelectFeaturesBasedOnCount : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * Microsoft.ML.InputOutputColumnPair[] * int64 -> Microsoft.ML.Transforms.CountFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnCount (catalog As TransformsCatalog.FeatureSelectionTransforms, columns As InputOutputColumnPair(), Optional count As Long = 1) As CountFeatureSelectingEstimator

参数

columns
InputOutputColumnPair[]

指定要对其应用转换的列的名称。 此估算器对数值、文本或键数据类型的向量或标量进行操作。 输出列的数据类型将与输入列的数据类型相同。

count
Int64

如果槽的非默认值计数大于或等于训练数据中的此阈值,则保留该槽。

返回

示例

using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic
{
    public static class SelectFeaturesBasedOnCountMultiColumn
    {
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for
            // exception tracking and logging, as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and convert it to an IDataView.
            var rawData = GetData();

            // Printing the columns of the input data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in rawData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.
                    NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,NaN,6                   A,WA,Male
            // 4,5,6                     A,,Female
            // 4,5,6                     A,NY,
            // 4,NaN,NaN                 A,,Male

            var data = mlContext.Data.LoadFromEnumerable(rawData);

            // We will use the SelectFeaturesBasedOnCount transform estimator, to
            // retain only those slots which have at least 'count' non-default
            // values per slot.

            // Multi column example. This pipeline transform two columns using the
            // provided parameters.
            var pipeline = mlContext.Transforms.FeatureSelection
                .SelectFeaturesBasedOnCount(new InputOutputColumnPair[] { new
                InputOutputColumnPair("NumericVector"), new InputOutputColumnPair(
                "StringVector") }, count: 3);

            var transformedData = pipeline.Fit(data).Transform(data);

            var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
                transformedData, true);

            // Printing the columns of the transformed data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in convertedData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
                    .NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,6                       A,Male
            // 4,6                       A,Female
            // 4,6                       A,
            // 4,NaN                     A,Male
        }

        private class TransformedData
        {
            public float[] NumericVector { get; set; }

            public string[] StringVector { get; set; }
        }

        public class InputData
        {
            [VectorType(3)]
            public float[] NumericVector { get; set; }

            [VectorType(3)]
            public string[] StringVector { get; set; }
        }

        /// <summary>
        /// Returns a few rows of data.
        /// </summary>
        public static IEnumerable<InputData> GetData()
        {
            var data = new List<InputData>
            {
                new InputData
                {
                    NumericVector = new float[] { 4, float.NaN, 6 },
                    StringVector = new string[] { "A", "WA", "Male"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", "", "Female"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", "NY", null}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, float.NaN, float.NaN },
                    StringVector = new string[] { "A", null, "Male"}
                }
            };
            return data;
        }
    }
}

适用于

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, String, String, Int64)

创建一个 CountFeatureSelectingEstimator,用于选择非默认值计数大于或等于阈值的槽。

public static Microsoft.ML.Transforms.CountFeatureSelectingEstimator SelectFeaturesBasedOnCount (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, string outputColumnName, string inputColumnName = default, long count = 1);
static member SelectFeaturesBasedOnCount : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * string * string * int64 -> Microsoft.ML.Transforms.CountFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnCount (catalog As TransformsCatalog.FeatureSelectionTransforms, outputColumnName As String, Optional inputColumnName As String = Nothing, Optional count As Long = 1) As CountFeatureSelectingEstimator

参数

outputColumnName
String

由转换 inputColumnName生成的列的名称。 此列的数据类型将与输入列的数据类型相同。

inputColumnName
String

要转换的列的名称。 If set to null, the value of the outputColumnName will be used as source. 此估算器对数值、文本或键数据类型的向量或标量进行操作。

count
Int64

如果槽的非默认值计数大于或等于训练数据中的此阈值,则保留该槽。

返回

示例

using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic
{
    public static class SelectFeaturesBasedOnCount
    {
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for
            // exception tracking and logging, as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and convert it to an IDataView.
            var rawData = GetData();

            // Printing the columns of the input data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in rawData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
                    .NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,NaN,6                   A,WA,Male
            // 4,5,6                     A,,Female
            // 4,5,6                     A,NY,
            // 4,0,NaN                   A,,Male

            var data = mlContext.Data.LoadFromEnumerable(rawData);

            // We will use the SelectFeaturesBasedOnCount to retain only those slots
            // which have at least 'count' non-default and non-missing values per
            // slot.
            var pipeline =
                mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(
                    outputColumnName: "NumericVector", count: 3) // Usage on numeric 
                                                                 // column.
                .Append(mlContext.Transforms.FeatureSelection
                .SelectFeaturesBasedOnCount(outputColumnName: "StringVector",
                count: 3)); // Usage on text column.

            var transformedData = pipeline.Fit(data).Transform(data);

            var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
                transformedData, true);

            // Printing the columns of the transformed data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in convertedData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.
                    NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,6                       A,Male
            // 4,6                       A,Female
            // 4,6                       A,
            // 4,NaN                     A,Male
        }

        public class TransformedData
        {
            public float[] NumericVector { get; set; }

            public string[] StringVector { get; set; }
        }

        public class InputData
        {
            [VectorType(3)]
            public float[] NumericVector { get; set; }

            [VectorType(3)]
            public string[] StringVector { get; set; }
        }

        /// <summary>
        /// Return a few rows of data.
        /// </summary>
        public static IEnumerable<InputData> GetData()
        {
            var data = new List<InputData>
            {
                new InputData
                {
                    NumericVector = new float[] { 4, float.NaN, 6 },
                    StringVector = new string[] { "A", "WA", "Male"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", string.Empty, "Female"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", "NY", null}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 0, float.NaN },
                    StringVector = new string[] { "A", null, "Male"}
                }
            };
            return data;
        }
    }
}

适用于