Skip to content

Commit

Permalink
Conversion catalog samples (#3167)
Browse files Browse the repository at this point in the history
* adding a sample for convert MultiColumns. Moving files around.

* Adjust the samples about ValueMapping

* Addressing PR comments
  • Loading branch information
sfilipi authored Apr 4, 2019
1 parent ac53748 commit e285889
Show file tree
Hide file tree
Showing 11 changed files with 339 additions and 336 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,6 @@ namespace Microsoft.ML.Samples.Dynamic
{
public static class ConvertType
{
private sealed class InputData
{
public bool Survived;
}

private sealed class TransformedData
{
public bool Survived { get; set; }

public Int32 SurvivedInt32 { get; set; }
}

public static void Example()
{
var mlContext = new MLContext(seed: 1);
Expand Down Expand Up @@ -51,5 +39,13 @@ public static void Example()
// A: False Aconv:0
// A: False Aconv:0
}
private class InputData
{
public bool Survived;
}
private sealed class TransformedData : InputData
{
public Int32 SurvivedInt32 { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
using System;
using Microsoft.ML.Data;

namespace Microsoft.ML.Samples.Dynamic
{
// This example illustrates how to convert multiple columns of different types to one type, in this case System.Single.
// This is often a useful data transformation before concatenating the features together and passing them to a particular estimator.
public static class ConvertTypeMultiColumn
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext(seed: 1);

var rawData = new[] {
new InputData() { Feature1 = true, Feature2 = "0.4", Feature3 = DateTime.Now, Feature4 = 0.145},
new InputData() { Feature1 = false, Feature2 = "0.5", Feature3 = DateTime.Today, Feature4 = 3.14},
new InputData() { Feature1 = false, Feature2 = "14", Feature3 = DateTime.Today, Feature4 = 0.2046},
new InputData() { Feature1 = false, Feature2 = "23", Feature3 = DateTime.Now, Feature4 = 0.1206},
new InputData() { Feature1 = true, Feature2 = "8904", Feature3 = DateTime.UtcNow, Feature4 = 8.09},
};

// Convert the data to an IDataView.
var data = mlContext.Data.LoadFromEnumerable(rawData);

// Construct the pipeline.
var pipeline = mlContext.Transforms.Conversion.ConvertType(new[]
{
new InputOutputColumnPair("Converted1", "Feature1"),
new InputOutputColumnPair("Converted2", "Feature2"),
new InputOutputColumnPair("Converted3", "Feature3"),
new InputOutputColumnPair("Converted4", "Feature4"),
},
DataKind.Single);

// Let's fit our pipeline to the data.
var transformer = pipeline.Fit(data);
// Transforming the same data. This will add the 4 columns defined in the pipeline, containing the converted
// values of the initial columns.
var transformedData = transformer.Transform(data);

// Shape the transformed data as a strongly typed IEnumerable.
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);

// Printing the results.
Console.WriteLine("Converted1\t Converted2\t Converted3\t Converted4");
foreach (var item in convertedData)
Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t {item.Converted3}\t {item.Converted4}");

// Transformed data.
//
// Converted1 Converted2 Converted3 Converted4
// 1 0.4 6.368921E+17 0.145
// 0 0.5 6.368916E+17 3.14
// 0 14 6.368916E+17 0.2046
// 0 23 6.368921E+17 0.1206
// 1 8904 6.368924E+17 8.09

}
// The initial data type
private class InputData
{
public bool Feature1;
public string Feature2;
public DateTime Feature3;
public double Feature4;
}
// The resulting data type after the transformation
private class TransformedData : InputData
{
public float Converted1 { get; set; }
public float Converted2 { get; set; }
public float Converted3 { get; set; }
public float Converted4 { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
using System;
using System.Collections.Generic;
using Microsoft.ML.Data;


namespace Microsoft.ML.Samples.Dynamic
{
public static class MapValue
{
/// This example demonstrates the use of the ValueMappingEstimator by mapping strings to other string values, or floats to strings.
/// This is useful to map types to a category.
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable.
var rawData = new[] {
new DataPoint() { Timeframe = "0-4yrs" , Score = 1 },
new DataPoint() { Timeframe = "6-11yrs" , Score = 2 },
new DataPoint() { Timeframe = "12-25yrs" , Score = 3 },
new DataPoint() { Timeframe = "0-5yrs" , Score = 4 },
new DataPoint() { Timeframe = "12-25yrs" , Score = 5 },
new DataPoint() { Timeframe = "25+yrs" , Score = 5 },
};

var data = mlContext.Data.LoadFromEnumerable(rawData);

// Construct the mapping to other strings for the Timeframe column.
var timeframeMap = new Dictionary<string, string>();
timeframeMap["0-4yrs"] = "Short";
timeframeMap["0-5yrs"] = "Short";
timeframeMap["6-11yrs"] = "Medium";
timeframeMap["12-25yrs"] = "Long";
timeframeMap["25+yrs"] = "Long";

// Construct the mapping of strings to keys(uints) for the Timeframe column.
var timeframeKeyMap = new Dictionary<string, uint>();
timeframeKeyMap["0-4yrs"] = 1;
timeframeKeyMap["0-5yrs"] = 1;
timeframeKeyMap["6-11yrs"] = 2;
timeframeKeyMap["12-25yrs"] = 3;
timeframeKeyMap["25+yrs"] = 3;

// Construct the mapping of ints to strings for the Score column.
var scoreMap = new Dictionary<int, string>();
scoreMap[1] = "Low";
scoreMap[2] = "Low";
scoreMap[3] = "Average";
scoreMap[4] = "High";
scoreMap[5] = "High";

// Constructs the ML.net pipeline
var pipeline = mlContext.Transforms.Conversion.MapValue("TimeframeCategory", timeframeMap, "Timeframe")
.Append(mlContext.Transforms.Conversion.MapValue("ScoreCategory", scoreMap, "Score"))
// on the MapValue below, the treatValuesAsKeyType is set to true. The type of the Label column will be a KeyDataViewType type,
// and it can be used as input for trainers performing multiclass classification.
.Append(mlContext.Transforms.Conversion.MapValue("Label", timeframeKeyMap, "Timeframe", treatValuesAsKeyType: true));

// Fits the pipeline to the data.
IDataView transformedData = pipeline.Fit(data).Transform(data);

// Getting the resulting data as an IEnumerable.
// This will contain the newly created columns.
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);

Console.WriteLine($" Timeframe TimeframeCategory Label Score ScoreCategory");
foreach (var featureRow in features)
Console.WriteLine($"{featureRow.Timeframe}\t\t{featureRow.TimeframeCategory}\t\t\t{featureRow.Label}\t\t{featureRow.Score}\t{featureRow.ScoreCategory}");

// TransformedData obtained post-transformation.
//
// Timeframe TimeframeCategory Label Score ScoreCategory
// 0-4yrs Short 1 1 Low
// 6-11yrs Medium 2 2 Low
// 12-25yrs Long 3 3 Average
// 0-5yrs Short 1 4 High
// 12-25yrs Long 3 5 High
// 25+yrs Long 3 5 High
}
private class DataPoint
{
public string Timeframe { get; set; }
public int Score { get; set; }
}
private class TransformedData : DataPoint
{
public string TimeframeCategory { get; set; }
public string ScoreCategory { get; set; }
public uint Label { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
using System;
using System.Collections.Generic;

namespace Microsoft.ML.Samples.Dynamic
{
public static class MapValueIdvLookup
{
/// This example demonstrates the use of MapValue by mapping floats to strings, looking up the mapping in an IDataView.
/// This is useful to map types to a grouping.
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable.
var rawData = new[] {
new DataPoint() { Price = 3.14f },
new DataPoint() { Price = 2000f },
new DataPoint() { Price = 1.19f },
new DataPoint() { Price = 2.17f },
new DataPoint() { Price = 33.784f },

};

// Convert to IDataView
var data = mlContext.Data.LoadFromEnumerable(rawData);

// Create the lookup map data IEnumerable.
var lookupData = new[] {
new LookupMap { Value = 3.14f, Category = "Low" },
new LookupMap { Value = 1.19f , Category = "Low" },
new LookupMap { Value = 2.17f , Category = "Low" },
new LookupMap { Value = 33.784f, Category = "Medium" },
new LookupMap { Value = 2000f, Category = "High"}

};

// Convert to IDataView
var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData);

// Constructs the ValueMappingEstimator making the ML.NET pipeline
var pipeline = mlContext.Transforms.Conversion.MapValue("PriceCategory", lookupIdvMap, lookupIdvMap.Schema["Value"], lookupIdvMap.Schema["Category"], "Price");

// Fits the ValueMappingEstimator and transforms the data converting the Price to PriceCategory.
IDataView transformedData = pipeline.Fit(data).Transform(data);

// Getting the resulting data as an IEnumerable.
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);

Console.WriteLine($" Price PriceCategory");
foreach (var featureRow in features)
Console.WriteLine($"{featureRow.Price}\t\t{featureRow.PriceCategory}");

// TransformedData obtained post-transformation.
//
// Price PriceCategory
// 3.14 Low
// 2000 High
// 1.19 Low
// 2.17 Low
// 33.784 Medium
}

// Type for the IDataView that will be serving as the map
private class LookupMap
{
public float Value { get; set; }
public string Category { get; set; }
}
private class DataPoint
{
public float Price { get; set; }
}
private class TransformedData : DataPoint
{
public string PriceCategory { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
using System;
using System.Collections.Generic;
namespace Microsoft.ML.Samples.Dynamic
{
public static class MapValueToArray
{
/// This example demonstrates the use of MapValue by mapping strings to array values, which allows for mapping data to numeric arrays.
/// This functionality is useful when the generated column will serve as the Features column for a trainer. Most of the trainers take a numeric vector, as the Features column.
/// In this example, we are mapping the Timeframe data to arbitrary integer arrays.
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable.
var rawData = new[] {
new DataPoint() { Timeframe = "0-4yrs" },
new DataPoint() { Timeframe = "6-11yrs" },
new DataPoint() { Timeframe = "12-25yrs" },
new DataPoint() { Timeframe = "0-5yrs" },
new DataPoint() { Timeframe = "12-25yrs" },
new DataPoint() { Timeframe = "25+yrs" },
};

var data = mlContext.Data.LoadFromEnumerable(rawData);

// Creating a list of key-value pairs to indicate the mapping between the
// DataPoint values, and the arrays they should map to.
var timeframeMap = new Dictionary<string, int[]>();
timeframeMap["0-4yrs"] = new int[] { 0, 5, 300 };
timeframeMap["0-5yrs"] = new int[] { 0, 5, 300 };
timeframeMap["6-11yrs"] = new int[] { 6, 11, 300 };
timeframeMap["12-25yrs"] = new int[] { 12, 50, 300 };
timeframeMap["25+yrs"] = new int[] { 12, 50, 300 };

// Constructs the ValueMappingEstimator making the ML.NET pipeline.
var pipeline = mlContext.Transforms.Conversion.MapValue("Features", timeframeMap, "Timeframe");

// Fits the ValueMappingEstimator and transforms the data adding the Features column.
IDataView transformedData = pipeline.Fit(data).Transform(data);

// Getting the resulting data as an IEnumerable.
IEnumerable<TransformedData> featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);

Console.WriteLine($"Timeframe Features");
foreach (var featureRow in featuresColumn)
Console.WriteLine($"{featureRow.Timeframe}\t\t {string.Join(",", featureRow.Features)}");

// Timeframe Features
// 0-4yrs 0, 5, 300
// 6-11yrs 6, 11, 300
// 12-25yrs 12, 50, 300
// 0-5yrs 0, 5, 300
// 12-25yrs 12, 50,300
// 25+yrs 12, 50, 300
}
public class DataPoint
{
public string Timeframe { get; set; }
}
public class TransformedData : DataPoint
{
public int[] Features { get; set; }
}
}
}
Loading

0 comments on commit e285889

Please sign in to comment.