FeatureSelectionCatalog.SelectFeaturesBasedOnCount Method
Definition
Important
Some information relates to prerelease product that may be substantially modified before it’s released. Microsoft makes no warranties, express or implied, with respect to the information provided here.
Overloads
SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], Int64) |
Create a CountFeatureSelectingEstimator, which selects the slots for which the count of non-default values is greater than or equal to a threshold. |
SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, String, String, Int64) |
Create a CountFeatureSelectingEstimator, which selects the slots for which the count of non-default values is greater than or equal to a threshold. |
SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, InputOutputColumnPair[], Int64)
Create a CountFeatureSelectingEstimator, which selects the slots for which the count of non-default values is greater than or equal to a threshold.
public static Microsoft.ML.Transforms.CountFeatureSelectingEstimator SelectFeaturesBasedOnCount (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, Microsoft.ML.InputOutputColumnPair[] columns, long count = 1);
static member SelectFeaturesBasedOnCount : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * Microsoft.ML.InputOutputColumnPair[] * int64 -> Microsoft.ML.Transforms.CountFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnCount (catalog As TransformsCatalog.FeatureSelectionTransforms, columns As InputOutputColumnPair(), Optional count As Long = 1) As CountFeatureSelectingEstimator
Parameters
The transform's catalog.
- columns
- InputOutputColumnPair[]
Specifies the names of the columns on which to apply the transformation. This estimator operates over vector or scalar of numeric, text or keys data types. The output columns' data types will be the same as the input columns' data types.
- count
- Int64
If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.
Returns
Examples
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic
{
public static class SelectFeaturesBasedOnCountMultiColumn
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable and convert it to an IDataView.
var rawData = GetData();
// Printing the columns of the input data.
Console.WriteLine($"NumericVector StringVector");
foreach (var item in rawData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.
NumericVector), string.Join(",", item.StringVector));
// NumericVector StringVector
// 4,NaN,6 A,WA,Male
// 4,5,6 A,,Female
// 4,5,6 A,NY,
// 4,NaN,NaN A,,Male
var data = mlContext.Data.LoadFromEnumerable(rawData);
// We will use the SelectFeaturesBasedOnCount transform estimator, to
// retain only those slots which have at least 'count' non-default
// values per slot.
// Multi column example. This pipeline transform two columns using the
// provided parameters.
var pipeline = mlContext.Transforms.FeatureSelection
.SelectFeaturesBasedOnCount(new InputOutputColumnPair[] { new
InputOutputColumnPair("NumericVector"), new InputOutputColumnPair(
"StringVector") }, count: 3);
var transformedData = pipeline.Fit(data).Transform(data);
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
transformedData, true);
// Printing the columns of the transformed data.
Console.WriteLine($"NumericVector StringVector");
foreach (var item in convertedData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
.NumericVector), string.Join(",", item.StringVector));
// NumericVector StringVector
// 4,6 A,Male
// 4,6 A,Female
// 4,6 A,
// 4,NaN A,Male
}
private class TransformedData
{
public float[] NumericVector { get; set; }
public string[] StringVector { get; set; }
}
public class InputData
{
[VectorType(3)]
public float[] NumericVector { get; set; }
[VectorType(3)]
public string[] StringVector { get; set; }
}
/// <summary>
/// Returns a few rows of data.
/// </summary>
public static IEnumerable<InputData> GetData()
{
var data = new List<InputData>
{
new InputData
{
NumericVector = new float[] { 4, float.NaN, 6 },
StringVector = new string[] { "A", "WA", "Male"}
},
new InputData
{
NumericVector = new float[] { 4, 5, 6 },
StringVector = new string[] { "A", "", "Female"}
},
new InputData
{
NumericVector = new float[] { 4, 5, 6 },
StringVector = new string[] { "A", "NY", null}
},
new InputData
{
NumericVector = new float[] { 4, float.NaN, float.NaN },
StringVector = new string[] { "A", null, "Male"}
}
};
return data;
}
}
}
Applies to
SelectFeaturesBasedOnCount(TransformsCatalog+FeatureSelectionTransforms, String, String, Int64)
Create a CountFeatureSelectingEstimator, which selects the slots for which the count of non-default values is greater than or equal to a threshold.
public static Microsoft.ML.Transforms.CountFeatureSelectingEstimator SelectFeaturesBasedOnCount (this Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms catalog, string outputColumnName, string inputColumnName = default, long count = 1);
static member SelectFeaturesBasedOnCount : Microsoft.ML.TransformsCatalog.FeatureSelectionTransforms * string * string * int64 -> Microsoft.ML.Transforms.CountFeatureSelectingEstimator
<Extension()>
Public Function SelectFeaturesBasedOnCount (catalog As TransformsCatalog.FeatureSelectionTransforms, outputColumnName As String, Optional inputColumnName As String = Nothing, Optional count As Long = 1) As CountFeatureSelectingEstimator
Parameters
The transform's catalog.
- outputColumnName
- String
Name of the column resulting from the transformation of inputColumnName
.
This column's data type will be the same as the input column's data type.
- inputColumnName
- String
Name of column to transform. If set to null
, the value of the outputColumnName
will be used as source.
This estimator operates over vector or scalar of numeric, text or keys data types.
- count
- Int64
If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.
Returns
Examples
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace Samples.Dynamic
{
public static class SelectFeaturesBasedOnCount
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable and convert it to an IDataView.
var rawData = GetData();
// Printing the columns of the input data.
Console.WriteLine($"NumericVector StringVector");
foreach (var item in rawData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item
.NumericVector), string.Join(",", item.StringVector));
// NumericVector StringVector
// 4,NaN,6 A,WA,Male
// 4,5,6 A,,Female
// 4,5,6 A,NY,
// 4,0,NaN A,,Male
var data = mlContext.Data.LoadFromEnumerable(rawData);
// We will use the SelectFeaturesBasedOnCount to retain only those slots
// which have at least 'count' non-default and non-missing values per
// slot.
var pipeline =
mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(
outputColumnName: "NumericVector", count: 3) // Usage on numeric
// column.
.Append(mlContext.Transforms.FeatureSelection
.SelectFeaturesBasedOnCount(outputColumnName: "StringVector",
count: 3)); // Usage on text column.
var transformedData = pipeline.Fit(data).Transform(data);
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
transformedData, true);
// Printing the columns of the transformed data.
Console.WriteLine($"NumericVector StringVector");
foreach (var item in convertedData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.
NumericVector), string.Join(",", item.StringVector));
// NumericVector StringVector
// 4,6 A,Male
// 4,6 A,Female
// 4,6 A,
// 4,NaN A,Male
}
public class TransformedData
{
public float[] NumericVector { get; set; }
public string[] StringVector { get; set; }
}
public class InputData
{
[VectorType(3)]
public float[] NumericVector { get; set; }
[VectorType(3)]
public string[] StringVector { get; set; }
}
/// <summary>
/// Return a few rows of data.
/// </summary>
public static IEnumerable<InputData> GetData()
{
var data = new List<InputData>
{
new InputData
{
NumericVector = new float[] { 4, float.NaN, 6 },
StringVector = new string[] { "A", "WA", "Male"}
},
new InputData
{
NumericVector = new float[] { 4, 5, 6 },
StringVector = new string[] { "A", string.Empty, "Female"}
},
new InputData
{
NumericVector = new float[] { 4, 5, 6 },
StringVector = new string[] { "A", "NY", null}
},
new InputData
{
NumericVector = new float[] { 4, 0, float.NaN },
StringVector = new string[] { "A", null, "Male"}
}
};
return data;
}
}
}