-
Notifications
You must be signed in to change notification settings - Fork 274
/
Copy pathKMeansAndLogisticRegressionbench.cs
55 lines (49 loc) · 2.63 KB
/
KMeansAndLogisticRegressionbench.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using BenchmarkDotNet.Attributes;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
namespace Microsoft.ML.Benchmarks
{
/// <summary>
/// This is an end-to-end benchmark that measures performance of a complete ML pipeline.
/// The pipeline consists of applying one hot encoding to categorical features, normalizing
/// numerical features, training a KMeans model on the features thus derived, and finally
/// training a Logistic Regression model on the derived features plus the score from the
/// KMeans trainer.
/// </summary>
[BenchmarkCategory(Categories.MachineLearning)]
public class KMeansAndLogisticRegressionBench
{
private readonly string _dataPath = Program.GetInvariantCultureDataPath("adult.tiny.with-schema.txt");
[Benchmark]
public CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator> TrainKMeansAndLR()
{
var ml = new MLContext(seed: 1);
// Pipeline
var input = ml.Data.LoadFromTextFile(_dataPath, new[] {
new TextLoader.Column("Label", DataKind.Boolean, 0),
new TextLoader.Column("CatFeatures", DataKind.String,
new [] {
new TextLoader.Range() { Min = 1, Max = 8 },
}),
new TextLoader.Column("NumFeatures", DataKind.Single,
new [] {
new TextLoader.Range() { Min = 9, Max = 14 },
}),
}, hasHeader: true);
var estimatorPipeline = ml.Transforms.Categorical.OneHotEncoding("CatFeatures")
.Append(ml.Transforms.NormalizeMinMax("NumFeatures"))
.Append(ml.Transforms.Concatenate("Features", "NumFeatures", "CatFeatures"))
.Append(ml.Clustering.Trainers.KMeans("Features"))
.Append(ml.Transforms.Concatenate("Features", "Features", "Score"))
.Append(ml.BinaryClassification.Trainers.LbfgsLogisticRegression(
new LbfgsLogisticRegressionBinaryTrainer.Options { EnforceNonNegativity = true, OptimizationTolerance = 1e-3f, }));
var model = estimatorPipeline.Fit(input);
// Return the last model in the chain.
return model.LastTransformer.Model;
}
}
}