Predicted value with ML.Net doesn't make sense
I'm trying to get into machine learning and have watched a few tutorials on ml.net. Using a sample from one of the tutorials I used a dataset created from sales stats. The idea is the larger the number of calls, emails and meetings for each row, the higher the deal number should be.
When I run this code with arguments for calls, emails and meetings like 200,100,25 I get -3.29. If I change it to 300,150,50 I get -6.73. So, it looks like the more calls, emails and meetings I add, the lower the number of deals I get... which is the opposite of what I expected. Being a complete noob to this I don't know if it's something I'm doing wrong in the code or if it's my dataset.
Would it have anything to do with only normalizing the Calls column? I'm not sure how to add additional normalizing to the emails and meetings columns.
Any ideas? Thanks.
using System;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace MLTest
{
internal class Program
{
private static readonly string _path = @"data\sales-stats-updated.csv";
private static void Main(string[] args)
{
var context = new MLContext(0);
var data = context.Data.LoadFromTextFile<Input>(_path, hasHeader: true, separatorChar: ',');
var pipeline = context.Transforms.NormalizeMinMax("Calls")
.Append(context.Transforms.Concatenate("Features", "Calls", "Emails", "Meetings"))
.Append(context.Regression.Trainers.Ols());
var model = pipeline.Fit(data);
var predictor = context.Model.CreatePredictionEngine<Input, Output>(model);
while (true)
{
Console.WriteLine("Enter the number of calls, emails and meetings for the week.");
var line = Console.ReadLine();
if (line == "") break;
var els = line.Split(",");
var input = new Input { Calls = float.Parse(els[0].Trim()), Emails = float.Parse(els[1].Trim()), Meetings = float.Parse(els[2].Trim())};
var prediction = predictor.Predict(input);
Console.WriteLine($"Predicted Deals: {prediction.Deals:0.##}");
}
}
}
public class Input
{
[LoadColumn(3)] public float Calls;
[LoadColumn(6)] [ColumnName("Label")] public float Deals;
[LoadColumn(2)] public float Emails;
[LoadColumn(4)] public float Meetings;
}
public class Output
{
[ColumnName("Score")] public float Deals;
}
}
Dataset of sales by user and week for last year:
userid,weekending,emails,calls,meetings,quotes,deals
289,1/5/2020 12:00:00 AM,267,67,20,18,0
289,1/12/2020 12:00:00 AM,201,49,18,119,2
289,1/19/2020 12:00:00 AM,268,88,9,101,1
289,1/26/2020 12:00:00 AM,155,23,10,48,0
289,2/2/2020 12:00:00 AM,115,47,10,31,0
289,2/9/2020 12:00:00 AM,141,12,15,145,2
289,2/16/2020 12:00:00 AM,104,19,1,116,0
289,2/23/2020 12:00:00 AM,90,9,1,133,2
289,3/1/2020 12:00:00 AM,186,63,1,44,1
289,3/8/2020 12:00:00 AM,103,33,2,24,0
289,3/15/2020 12:00:00 AM,135,41,0,50,2
289,3/22/2020 12:00:00 AM,225,27,6,58,0
289,3/29/2020 12:00:00 AM,173,53,1,53,2
289,4/5/2020 12:00:00 AM,171,35,1,40,1
289,4/12/2020 12:00:00 AM,201,41,3,9,0
289,4/19/2020 12:00:00 AM,155,22,5,1,0
289,4/26/2020 12:00:00 AM,169,38,3,174,2
289,5/3/2020 12:00:00 AM,231,34,2,45,0
289,5/10/2020 12:00:00 AM,216,31,3,22,1
289,5/17/2020 12:00:00 AM,172,32,4,23,0
289,5/24/2020 12:00:00 AM,147,46,1,39,2
289,5/31/2020 12:00:00 AM,253,36,3,14,0
289,6/7/2020 12:00:00 AM,129,8,1,12,0
289,6/14/2020 12:00:00 AM,97,9,5,0,0
289,6/21/2020 12:00:00 AM,162,9,7,31,0
289,6/28/2020 12:00:00 AM,204,12,3,105,9
289,7/5/2020 12:00:00 AM,591,25,11,117,1
289,7/12/2020 12:00:00 AM,198,22,5,103,6
289,7/19/2020 12:00:00 AM,254,15,7,24,2
289,7/26/2020 12:00:00 AM,244,15,2,84,0
289,8/2/2020 12:00:00 AM,177,10,4,100,5
289,8/9/2020 12:00:00 AM,176,7,3,21,2
289,8/16/2020 12:00:00 AM,234,36,6,8,1
289,8/23/2020 12:00:00 AM,156,21,3,12,1
289,8/30/2020 12:00:00 AM,186,14,6,174,11
289,9/6/2020 12:00:00 AM,142,22,4,55,4
289,9/13/2020 12:00:00 AM,214,28,2,90,0
289,9/20/2020 12:00:00 AM,236,22,1,86,0
289,9/27/2020 12:00:00 AM,48,1,0,77,3
289,10/4/2020 12:00:00 AM,170,0,2,51,0
289,10/11/2020 12:00:00 AM,157,20,9,121,2
289,10/18/2020 12:00:00 AM,152,20,3,237,14
289,10/25/2020 12:00:00 AM,126,23,9,217,2
289,11/1/2020 12:00:00 AM,252,28,9,68,3
289,11/8/2020 12:00:00 AM,338,36,7,75,2
289,11/15/2020 12:00:00 AM,240,11,9,33,2
289,11/22/2020 12:00:00 AM,241,2,2,15,1
289,11/29/2020 12:00:00 AM,260,1,8,94,0
289,12/6/2020 12:00:00 AM,123,7,9,0,0
289,12/13/2020 12:00:00 AM,368,2,3,57,2
289,12/20/2020 12:00:00 AM,119,4,1,52,3
289,12/27/2020 12:00:00 AM,205,4,2,2,0
291,2/23/2020 12:00:00 AM,43,0,1,5,0
291,3/1/2020 12:00:00 AM,151,41,3,0,0
291,3/8/2020 12:00:00 AM,261,152,2,40,0
291,3/15/2020 12:00:00 AM,223,171,5,0,0
291,3/22/2020 12:00:00 AM,223,133,5,1,0
291,3/29/2020 12:00:00 AM,250,140,3,0,0
291,4/5/2020 12:00:00 AM,259,146,7,0,0
291,4/12/2020 12:00:00 AM,256,119,9,0,0
291,4/19/2020 12:00:00 AM,227,119,8,10,0
291,4/26/2020 12:00:00 AM,281,118,5,16,1
291,5/3/2020 12:00:00 AM,275,128,6,12,0
291,5/10/2020 12:00:00 AM,251,159,5,0,0
291,5/17/2020 12:00:00 AM,238,97,2,0,0
291,5/24/2020 12:00:00 AM,239,113,5,9,1
291,5/31/2020 12:00:00 AM,262,123,6,16,0
291,6/7/2020 12:00:00 AM,265,91,1,20,0
291,6/14/2020 12:00:00 AM,244,111,3,2,0
291,6/21/2020 12:00:00 AM,262,136,2,0,0
291,6/28/2020 12:00:00 AM,136,77,4,68,2
291,7/5/2020 12:00:00 AM,451,175,6,76,0
291,7/12/2020 12:00:00 AM,271,202,3,36,0
291,7/19/2020 12:00:00 AM,292,126,5,171,0
291,7/26/2020 12:00:00 AM,211,131,4,69,0
291,8/2/2020 12:00:00 AM,274,125,2,275,4
291,8/9/2020 12:00:00 AM,288,105,3,122,1
291,8/16/2020 12:00:00 AM,254,170,2,33,0
291,8/23/2020 12:00:00 AM,149,64,3,185,0
291,8/30/2020 12:00:00 AM,260,145,2,66,1
291,9/6/2020 12:00:00 AM,140,70,1,140,0
291,9/13/2020 12:00:00 AM,284,150,7,249,1
291,9/20/2020 12:00:00 AM,264,121,4,239,0
291,9/27/2020 12:00:00 AM,255,125,6,55,0
291,10/4/2020 12:00:00 AM,255,107,4,74,1
291,10/11/2020 12:00:00 AM,245,120,4,75,7
291,10/18/2020 12:00:00 AM,176,80,4,15,0
291,10/25/2020 12:00:00 AM,269,140,0,59,5
291,11/1/2020 12:00:00 AM,253,128,3,15,0
291,11/8/2020 12:00:00 AM,270,113,3,68,2
291,11/15/2020 12:00:00 AM,282,130,10,38,0
291,11/22/2020 12:00:00 AM,179,70,5,89,1
291,11/29/2020 12:00:00 AM,529,116,4,293,30
291,12/6/2020 12:00:00 AM,263,102,9,645,36
291,12/13/2020 12:00:00 AM,300,111,1,536,47
291,12/20/2020 12:00:00 AM,227,46,6,342,20
291,12/27/2020 12:00:00 AM,235,75,1,97,4
292,2/23/2020 12:00:00 AM,34,0,0,88,0
292,3/1/2020 12:00:00 AM,162,100,3,0,0
292,3/8/2020 12:00:00 AM,246,145,8,2,0
292,3/15/2020 12:00:00 AM,228,104,6,12,0
292,3/22/2020 12:00:00 AM,199,93,14,0,0
292,3/29/2020 12:00:00 AM,188,90,5,48,0
292,4/5/2020 12:00:00 AM,234,112,2,76,2
292,4/12/2020 12:00:00 AM,182,97,5,23,0
292,4/19/2020 12:00:00 AM,223,68,7,28,1
292,4/26/2020 12:00:00 AM,267,74,7,2,0
292,5/3/2020 12:00:00 AM,224,82,13,4,0
292,5/10/2020 12:00:00 AM,200,76,3,34,0
292,5/17/2020 12:00:00 AM,170,65,2,40,1
292,5/24/2020 12:00:00 AM,138,67,3,47,0
292,5/31/2020 12:00:00 AM,160,86,4,44,0
292,6/7/2020 12:00:00 AM,181,75,6,35,1
292,6/14/2020 12:00:00 AM,159,63,1,13,0
292,6/21/2020 12:00:00 AM,180,65,7,0,0
292,6/28/2020 12:00:00 AM,138,35,1,39,2
292,7/5/2020 12:00:00 AM,393,69,8,109,1
292,7/12/2020 12:00:00 AM,162,78,5,38,1
292,7/19/2020 12:00:00 AM,219,90,6,76,0
292,7/26/2020 12:00:00 AM,197,86,5,53,0
292,8/2/2020 12:00:00 AM,210,85,3,33,0
292,8/9/2020 12:00:00 AM,229,80,3,34,0
292,8/16/2020 12:00:00 AM,233,93,5,63,0
292,8/23/2020 12:00:00 AM,244,102,5,33,0
292,8/30/2020 12:00:00 AM,191,69,6,117,4
292,9/6/2020 12:00:00 AM,237,71,2,70,1
292,9/13/2020 12:00:00 AM,263,96,6,9,1
292,9/20/2020 12:00:00 AM,225,76,3,80,3
292,9/27/2020 12:00:00 AM,265,92,2,64,1
292,10/4/2020 12:00:00 AM,271,88,0,31,1
292,10/11/2020 12:00:00 AM,229,102,5,7,0
292,10/18/2020 12:00:00 AM,230,74,2,3,0
292,10/25/2020 12:00:00 AM,202,79,4,5,0
292,11/1/2020 12:00:00 AM,212,116,5,0,0
292,11/8/2020 12:00:00 AM,288,101,5,45,2
292,11/15/2020 12:00:00 AM,263,118,2,1,0
292,11/22/2020 12:00:00 AM,119,26,1,28,2
292,11/29/2020 12:00:00 AM,13,0,0,0,0
292,12/6/2020 12:00:00 AM,245,96,4,84,2
292,12/13/2020 12:00:00 AM,191,98,4,34,0
292,12/20/2020 12:00:00 AM,68,11,0,9,0
292,12/27/2020 12:00:00 AM,199,36,0,43,0
294,2/23/2020 12:00:00 AM,65,0,0,21,0
294,3/1/2020 12:00:00 AM,145,34,1,2,0
294,3/8/2020 12:00:00 AM,205,62,10,74,0
294,3/15/2020 12:00:00 AM,199,70,11,13,0
294,3/22/2020 12:00:00 AM,295,117,10,100,1
294,3/29/2020 12:00:00 AM,187,75,8,50,0
294,4/5/2020 12:00:00 AM,240,78,13,13,0
294,4/12/2020 12:00:00 AM,250,106,11,0,0
294,4/19/2020 12:00:00 AM,259,114,13,655,0
294,4/26/2020 12:00:00 AM,245,79,7,136,0
294,5/3/2020 12:00:00 AM,191,82,8,19,0
294,5/10/2020 12:00:00 AM,220,63,8,180,0
294,5/17/2020 12:00:00 AM,193,98,16,125,0
294,5/24/2020 12:00:00 AM,167,67,6,75,0
294,5/31/2020 12:00:00 AM,164,75,8,24,0
294,6/7/2020 12:00:00 AM,201,87,11,68,0
294,6/14/2020 12:00:00 AM,174,75,10,20,0
294,6/21/2020 12:00:00 AM,68,40,8,99,0
294,6/28/2020 12:00:00 AM,201,90,7,439,5
294,7/5/2020 12:00:00 AM,441,96,11,122,0
294,7/12/2020 12:00:00 AM,227,135,8,33,2
294,7/19/2020 12:00:00 AM,246,147,9,276,4
294,7/26/2020 12:00:00 AM,189,133,5,184,1
294,8/2/2020 12:00:00 AM,235,146,10,147,0
294,8/9/2020 12:00:00 AM,235,109,11,236,2
294,8/16/2020 12:00:00 AM,236,155,7,72,0
294,8/23/2020 12:00:00 AM,238,130,8,225,0
294,8/30/2020 12:00:00 AM,237,124,4,49,1
294,9/6/2020 12:00:00 AM,183,50,5,218,3
294,9/13/2020 12:00:00 AM,192,170,5,115,4
294,9/20/2020 12:00:00 AM,172,95,5,140,2
294,9/27/2020 12:00:00 AM,254,157,6,228,8
294,10/4/2020 12:00:00 AM,197,91,5,170,6
294,10/11/2020 12:00:00 AM,239,120,4,333,6
294,10/18/2020 12:00:00 AM,184,89,7,114,1
294,10/25/2020 12:00:00 AM,198,98,11,176,1
294,11/1/2020 12:00:00 AM,220,109,3,23,0
294,11/8/2020 12:00:00 AM,252,121,7,393,0
294,11/15/2020 12:00:00 AM,301,91,7,92,1
294,11/22/2020 12:00:00 AM,165,33,3,59,1
294,11/29/2020 12:00:00 AM,846,94,6,102,4
294,12/6/2020 12:00:00 AM,347,62,7,176,3
294,12/13/2020 12:00:00 AM,219,68,5,426,9
294,12/20/2020 12:00:00 AM,104,16,4,215,2
294,12/27/2020 12:00:00 AM,212,22,1,107,4