Share via


FeatureSelectionCatalog.SelectFeaturesBasedOnCount 方法

定義

多載

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], Int64)

建立 , CountFeatureSelectingEstimator 這會選取非預設值計數大於或等於閾值的位置。

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, String, String, Int64)

建立 , CountFeatureSelectingEstimator 這會選取非預設值計數大於或等於閾值的位置。

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], Int64)

建立 , CountFeatureSelectingEstimator 這會選取非預設值計數大於或等於閾值的位置。

public static Microsoft.ML.Transforms.CountFeatureSelectingEstimator SelectFeaturesBasedOnCount (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, Microsoft.ML.InputOutputColumnPair[] columns, long count = 1);
static member SelectFeaturesBasedOnCount : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * Microsoft.ML.InputOutputColumnPair[] * int64 -> Microsoft.ML.Transforms.CountFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnCount (catalog As TransformsCatalog.FeatureSelectionTransforms, columns As InputOutputColumnPair(), Optional count As Long = 1) As CountFeatureSelectingEstimator

參數

columns
InputOutputColumnPair[]

指定要套用轉換的資料行名稱。 此估算器會透過數值、文字或索引鍵資料類型的向量或純量運作。 輸出資料行的資料類型會與輸入資料行的資料類型相同。

count
Int64

如果位置的非預設值計數大於或等於定型資料中的這個臨界值,則會保留該位置。

傳回

範例

using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic
{
    public static class SelectFeaturesBasedOnCountMultiColumn
    {
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for
            // exception tracking and logging, as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and convert it to an IDataView.
            var rawData = GetData();

            // Printing the columns of the input data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in rawData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.
                    NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,NaN,6                   A,WA,Male
            // 4,5,6                     A,,Female
            // 4,5,6                     A,NY,
            // 4,NaN,NaN                 A,,Male

            var data = mlContext.Data.LoadFromEnumerable(rawData);

            // We will use the SelectFeaturesBasedOnCount transform estimator, to
            // retain only those slots which have at least 'count' non-default
            // values per slot.

            // Multi column example. This pipeline transform two columns using the
            // provided parameters.
            var pipeline = mlContext.Transforms.FeatureSelection
                .SelectFeaturesBasedOnCount(new InputOutputColumnPair[] { new
                InputOutputColumnPair("NumericVector"), new InputOutputColumnPair(
                "StringVector") }, count: 3);

            var transformedData = pipeline.Fit(data).Transform(data);

            var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
                transformedData, true);

            // Printing the columns of the transformed data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in convertedData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
                    .NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,6                       A,Male
            // 4,6                       A,Female
            // 4,6                       A,
            // 4,NaN                     A,Male
        }

        private class TransformedData
        {
            public float[] NumericVector { get; set; }

            public string[] StringVector { get; set; }
        }

        public class InputData
        {
            [VectorType(3)]
            public float[] NumericVector { get; set; }

            [VectorType(3)]
            public string[] StringVector { get; set; }
        }

        /// <summary>
        /// Returns a few rows of data.
        /// </summary>
        public static IEnumerable<InputData> GetData()
        {
            var data = new List<InputData>
            {
                new InputData
                {
                    NumericVector = new float[] { 4, float.NaN, 6 },
                    StringVector = new string[] { "A", "WA", "Male"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", "", "Female"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", "NY", null}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, float.NaN, float.NaN },
                    StringVector = new string[] { "A", null, "Male"}
                }
            };
            return data;
        }
    }
}

適用於

SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, String, String, Int64)

建立 , CountFeatureSelectingEstimator 這會選取非預設值計數大於或等於閾值的位置。

public static Microsoft.ML.Transforms.CountFeatureSelectingEstimator SelectFeaturesBasedOnCount (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, string outputColumnName, string inputColumnName = default, long count = 1);
static member SelectFeaturesBasedOnCount : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * string * string * int64 -> Microsoft.ML.Transforms.CountFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnCount (catalog As TransformsCatalog.FeatureSelectionTransforms, outputColumnName As String, Optional inputColumnName As String = Nothing, Optional count As Long = 1) As CountFeatureSelectingEstimator

參數

outputColumnName
String

轉換所產生的 inputColumnName 資料行名稱。 此資料行的資料類型會與輸入資料行的資料類型相同。

inputColumnName
String

要轉換的資料行名稱。 如果設定為 null ,則會將 的值 outputColumnName 當做來源使用。 此估算器會透過數值、文字或索引鍵資料類型的向量或純量運作。

count
Int64

如果位置的非預設值計數大於或等於定型資料中的這個臨界值,則會保留該位置。

傳回

範例

using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic
{
    public static class SelectFeaturesBasedOnCount
    {
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for
            // exception tracking and logging, as well as the source of randomness.
            var mlContext = new MLContext();

            // Get a small dataset as an IEnumerable and convert it to an IDataView.
            var rawData = GetData();

            // Printing the columns of the input data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in rawData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
                    .NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,NaN,6                   A,WA,Male
            // 4,5,6                     A,,Female
            // 4,5,6                     A,NY,
            // 4,0,NaN                   A,,Male

            var data = mlContext.Data.LoadFromEnumerable(rawData);

            // We will use the SelectFeaturesBasedOnCount to retain only those slots
            // which have at least 'count' non-default and non-missing values per
            // slot.
            var pipeline =
                mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(
                    outputColumnName: "NumericVector", count: 3) // Usage on numeric 
                                                                 // column.
                .Append(mlContext.Transforms.FeatureSelection
                .SelectFeaturesBasedOnCount(outputColumnName: "StringVector",
                count: 3)); // Usage on text column.

            var transformedData = pipeline.Fit(data).Transform(data);

            var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
                transformedData, true);

            // Printing the columns of the transformed data. 
            Console.WriteLine($"NumericVector             StringVector");
            foreach (var item in convertedData)
                Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.
                    NumericVector), string.Join(",", item.StringVector));

            // NumericVector             StringVector
            // 4,6                       A,Male
            // 4,6                       A,Female
            // 4,6                       A,
            // 4,NaN                     A,Male
        }

        public class TransformedData
        {
            public float[] NumericVector { get; set; }

            public string[] StringVector { get; set; }
        }

        public class InputData
        {
            [VectorType(3)]
            public float[] NumericVector { get; set; }

            [VectorType(3)]
            public string[] StringVector { get; set; }
        }

        /// <summary>
        /// Return a few rows of data.
        /// </summary>
        public static IEnumerable<InputData> GetData()
        {
            var data = new List<InputData>
            {
                new InputData
                {
                    NumericVector = new float[] { 4, float.NaN, 6 },
                    StringVector = new string[] { "A", "WA", "Male"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", string.Empty, "Female"}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 5, 6 },
                    StringVector = new string[] { "A", "NY", null}
                },
                new InputData
                {
                    NumericVector = new float[] { 4, 0, float.NaN },
                    StringVector = new string[] { "A", null, "Male"}
                }
            };
            return data;
        }
    }
}

適用於