% Study model accuracy for different model types
function [ML, ErrTest, bTrainOnce] = ComputeModel(DataX, DataY, nModel)

global bFullEvaluation trainX TrainY MAEbest fRemove

% Number of oprimizing iterations
if bFullEvaluation
    NumEvals = 500;
else
    NumEvals = 40;
end


% Create train and test sets
testFraction = 0.2+0.1*rand(); %randomizing test/train set sizes a bit
rnum = rand(size(DataY));
bInTest = (rnum < testFraction);
TrainX = (DataX(~bInTest,:));   TrainY = (DataY(~bInTest,:));
TestX  = (DataX( bInTest,:));   TestY  = (DataY(bInTest,:));

% Initialize worst error
MAEbest = 1.0e100;

% Set the weights for the train set by excluding its outliers
weights = ones(size(TrainY));
if fRemove>0
    % Find outlier threshold
    timeRemove = quantile(abs(TrainY), 1-fRemove);
    % Remove outliers from the train data, to reduce their impact on MAE.
    inmodel = (abs(TrainY)<timeRemove);
    TrainX = TrainX(inmodel,:);
    TrainY = TrainY(inmodel);
    %weights(abs(TrainY)>timeRemove) = 0;  % remove large outliers
end

% If the model does cross-validation, train it once
bTrainOnce = false;

% Initialize model name
ML = 'Unknown';

% Disable annoying warnings
warning('off', 'stats:regress:RankDefDesignMat');

% Partition the original data into train and test sets
Mdl = []; 
hyperops = struct(  'MaxObjectiveEvaluations', NumEvals, ... % keep large to do more exhaustive search
                    'AcquisitionFunctionName', 'expected-improvement',... % time-independent optimization
                    'UseParallel', true, ...
                    'ShowPlots', false, ...
                    'Repartition', false, ...
                    'Verbose', 0);

% Main part: Run different models, computing their errors
if nModel==0
    TrainXY = TrainX; TrainXY.Wait = TrainY;
    Mdl = stepwiselm(TrainXY)
    %Mdl = fitlm(TrainXY,'interactions');
    save('Mdl.mat', 'Mdl')
    ErrTest  = ComputeErrors(predict(Mdl,TestX), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,TrainX), TrainY);
    
    bTrainOnce = true;
     
    ML = 'LinearRegression- Interactions'; 
% --------------------------  Experimental ends --------------------

elseif nModel==100
    % Linear regression on the most recent wait. 
    % Analogous to [b,~,Residual] = regress(DataY,table2array(DataX)); 
    VarList = {'MostRecent1'};
    [Mdl,~] = fitrlinear(table2array(TrainX(:, VarList)), TrainY, ...
                        'FitBias', false, ... no intercept
                        'Learner', 'leastsquares', ... least squares regression, not svm
                        'Lambda', 0);  ... Lambda=0 turns off regularization
    ErrTest = ComputeErrors(predict(Mdl,table2array(TestX(:,VarList))), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,table2array(TrainX(:,VarList))), TrainY);
    ML = 'MostRecentWait';
    
elseif nModel==110
    % Linear regression on the Moving Average of the most recent waits 
    VarList = {'MostRecent1', 'MostRecent2', 'MostRecent3', 'MostRecent4', 'MostRecent5'};
    [Mdl,~] = fitrlinear(table2array(TrainX(:, VarList)), TrainY, ...
                        'FitBias', false, ... no intercept
                        'Learner', 'leastsquares', ... least squares regression, not svm
                        'Lambda', 0);  ... Lambda=0 turns off regularization
    ErrTest = ComputeErrors(predict(Mdl,table2array(TestX(:,VarList))), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,table2array(TrainX(:,VarList))), TrainY);
    ML = 'MostRecentWaits-Average';
    
elseif nModel==111
    % Linear regression on the Moving Average of the most recent waits 
    VarList = {'LineCount0', 'LineCount1', 'LineCount2', 'LineCount3'};
    [Mdl,~] = fitrlinear(table2array(TrainX(:, VarList)), TrainY, ...
                        'FitBias', true, ... intercept
                        'Learner', 'leastsquares', ... least squares regression, not svm
                        'Lambda', 0);  ... Lambda=0 turns off regularization
    ErrTest = ComputeErrors(predict(Mdl,table2array(TestX(:,VarList))), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,table2array(TrainX(:,VarList))), TrainY);
    ML = 'LineCounts';
    
elseif nModel==112
    % Linear regression on the best variable
    VarList = {'LineCount0Strict', 'AheadCount', 'StartTime4', 'DelayedInLine', ...
               'InProgressSize', 'NumCompletedToday', 'NumScheduledNextSlot', ...
               'LineCount0', 'NumScheduledNextW2', 'SumHowEarlyWaiting', 'AfterSlot', ...
               'SumWaits', 'SchFlowCount2', 'SumDelayInProgress', ...
               'BeforeSlot', 'IsFirst'  };
           
%     VarList = {'LineCount0', 'AheadCount', 'NoneInLine', 'NoneInProgress', ...
%                'NoneCompleted', 'StartTime4', 'InProgressSize', 'NumCompletedToday', ...
%                'NumCompletedInLastW1', 'AvgWaitLastW1'};
                
    trainXI = [ones(size(TrainY)) table2array(TrainX(:, VarList))]; % train with intercept
    testXI  = [ones(size(TestY))  table2array(TestX(:,  VarList))]; % test with intercept
 
    b = regress(TrainY, trainXI);
    ErrTest = ComputeErrors(testXI*b, TestY);
    ErrTest.train = ComputeErrors(trainXI*b, TrainY);
%     [Mdl,~] = fitrlinear(table2array(TrainX(:, VarList)), TrainY, ...
%                         'FitBias', true, ... no intercept
%                         'Learner', 'leastsquares', ... least squares regression, not svm
%                         'Lambda', 0);  ... Lambda=0 turns off regularization
%     ErrTest = ComputeErrors(predict(Mdl,table2array(TestX(:,VarList))), TestY);
%     ErrTest.train = ComputeErrors(predict(Mdl,table2array(TrainX(:,VarList))), TrainY);
    ML = 'LinearRegression-BestFeatures';
    
elseif nModel==115
    % Linear regression to find the best short model
    Mdl = stepwiselm(TrainX, TrainY, 'interactions');
    ErrTest = ComputeErrors(predict(Mdl,table2array(TestX)), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,table2array(TrainX)), TrainY);
    ML = 'MostRecentWaits-Average';
    
elseif nModel==120
    % Linear regression on the Intercept only
    [Mdl,~] = fitrlinear(ones(size(TrainY)), TrainY, ...
                        'FitBias', false, ... no intercept
                        'Learner', 'leastsquares', ... least squares regression, not svm
                        'Lambda', 0);  ... Lambda=0 turns off regularization
    ErrTest = ComputeErrors(predict(Mdl, ones(size(TestY))), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl, ones(size(TrainY))), TrainY);
    ML = 'InterceptOnly';
      
elseif nModel==130
    % Linear regression on ALL variables (with intercept)
    trainXI = [ones(size(TrainY)) table2array(TrainX)]; % train with intercept
    testXI  = [ones(size(TestY))  table2array(TestX)];  % test with intercept
 
    if true
        b = regress(TrainY, trainXI);
        ErrTest = ComputeErrors(testXI*b, TestY);
        ErrTest.train = ComputeErrors(trainXI*b, TrainY);
    else       
        [Mdl,~] = fitrlinear(table2array(TrainX), TrainY);
        ErrTest = ComputeErrors(predict(Mdl,table2array(TestX)), TestY);
        ErrTest.train = ComputeErrors(predict(Mdl,table2array(TrainX)), TrainY);
    end
    ML = 'LinearRegression';
          
elseif nModel==135
    % Linear WEIGHTED regression on ALL variables (with intercept)
    trainXI = [ones(size(TrainY)) table2array(TrainX)]; % train with intercept
    testXI  = [ones(size(TestY))  table2array(TestX)];  % test with intercept

    Mdl = fitlm(trainXI, TrainY, 'Weights', weights);
    ErrTest = ComputeErrors(predict(Mdl, testXI), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl, trainXI), TrainY);

    ML = 'LinearRegression-W';
          
elseif nModel==137
    % Linear ROBUST regression on ALL variables (with intercept)
    trainX = [table2array(TrainX)]; % train with intercept
    testX  = [table2array(TestX)];  % test with intercept

    b = robustfit(trainX, TrainY, @RobustWeights, 1, 'off');
    ErrTest = ComputeErrors(testX*b, TestY);
    ErrTest.train = ComputeErrors(trainX*b, TrainY);

    ML = 'LinearRegression-R';

elseif nModel==140
    % Linear regression on the All variables (with intercept), optimized
    trainXI = [ones(size(TrainY)) table2array(TrainX)]; % train with intercept
    testXI  = [ones(size(TestY))  table2array(TestX)];  % test with intercept
    betaInitial = regress(TrainY, trainXI); % initial coefficient values
    [Mdl,~] = fitrlinear(trainXI, TrainY, ...
                            ...'Beta', betaInitial, ...
                            'Regularization', 'ridge', ...
                            'Learner', 'leastsquares', ...
                            'OptimizeHyperparameters',{'Lambda'}, ...
                            'HyperparameterOptimizationOptions', hyperops);
    ErrTest = ComputeErrors(predict(Mdl,testXI), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,trainXI), TrainY);
    ML = 'LinearRegression-Optimized';
      
elseif nModel==141
    % Linear regression on the All variables (with intercept), optimized
    trainXI = [ones(size(TrainY)) table2array(TrainX)]; % train with intercept
    testXI  = [ones(size(TestY))  table2array(TestX)];  % test with intercept
    betaInitial = regress(TrainY, trainXI); % initial coefficient values
    [Mdl,~] = fitrlinear(trainXI, TrainY, ...
                            'Beta', betaInitial, ...
                            'Regularization', 'ridge', ...
                            'Learner', 'leastsquares', ...
                            'OptimizeHyperparameters',{'Lambda'}, ...
                            'HyperparameterOptimizationOptions', hyperops);
    ErrTest = ComputeErrors(predict(Mdl,testXI), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,trainXI), TrainY);
    ML = 'LinearRegression-Optimized';
      
elseif nModel==150
    % ElasticNet regression on All variables (with intercept). 
    % Takes forever!!!
    trainXI = [ones(size(TrainY)) table2array(TrainX)]; % train with intercept
    testXI  = [ones(size(TestY))  table2array(TestX)];  % test with intercept
    
    % Compute ElasticNet (using Lasso)
    [b,fitinfo] = lasso(trainXI, TrainY, 'CV', 5);
    lam = fitinfo.Index1SE;
    b = b(:,lam);
    ErrTest = ComputeErrors(testXI*b, TestY);
    ErrTest.train = ComputeErrors(trainXI*b, TrainY);
    ML = 'ElasticNet';

elseif nModel==300
    % Train Gaussian kernel
    Mdl = fitrkernel(table2array(TrainX), TrainY);
    ErrTest  = ComputeErrors(predict(Mdl,table2array(TestX)), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,table2array(TrainX)), TrainY);
    ML = 'GaussianKernel';

elseif nModel==350
    % Train optimized Gaussian kernel
    Mdl = fitrkernel(table2array(TrainX), TrainY,...
                         'OptimizeHyperparameters', 'all',...
                         'HyperparameterOptimizationOptions', hyperops);
    ErrTest  = ComputeErrors(predict(Mdl,table2array(TestX)), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,table2array(TrainX)), TrainY);
    ML = 'GaussianKernel-Optimized';

elseif nModel==400
    % Train single decision tree
    Mdl = fitrtree(TrainX, TrainY);
    ErrTest  = ComputeErrors(predict(Mdl,TestX), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,TrainX), TrainY);
    ML = 'DecisionTree';

elseif nModel==450
    % Train single decision tree, optimized
    Mdl = fitrtree(TrainX, TrainY,...
                         'OptimizeHyperparameters', 'all',...
                         'HyperparameterOptimizationOptions', hyperops);
    ErrTest  = ComputeErrors(predict(Mdl,TestX), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,TrainX), TrainY);
    ML = 'DecisionTree-Optimized';
    
elseif nModel==500
    % Train boosted forest, small
    t = templateTree('MaxNumSplits',10);
    Mdl = fitrensemble(TrainX, TrainY, 'NumLearningCycles', 5, 'Learners', t);
    ErrTest  = ComputeErrors(predict(Mdl,TestX), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,TrainX), TrainY);
    ML = 'ForestSmall_5trees_10splits';
    
elseif nModel==510
    % Train boosted forest, large
    t = templateTree('MaxNumSplits',20);
    Mdl = fitrensemble(TrainX, TrainY, 'NumLearningCycles', 100, 'Learners', t);
    ErrTest  = ComputeErrors(predict(Mdl,TestX), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,TrainX), TrainY);
    ML = 'ForestLarge_100trees_20splits';
    
elseif nModel==520
    % Train boosted forest, huge
    t = templateTree('MaxNumSplits',30);
    Mdl = fitrensemble(TrainX, TrainY, 'NumLearningCycles', 300, 'Learners', t);
    ErrTest  = ComputeErrors(predict(Mdl,TestX), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,TrainX), TrainY);
    ML = 'ForestHuge_300trees_30splits';
    
elseif nModel==550
    % Train boosted forest, optimized
    Mdl = fitrensemble(TrainX, TrainY,...
                         'OptimizeHyperparameters', 'all',...
                         'HyperparameterOptimizationOptions', hyperops);
    ErrTest  = ComputeErrors(predict(Mdl,TestX), TestY);
    ErrTest.train = ComputeErrors(predict(Mdl,TrainX), TrainY);
    ML = 'Forest-Optimized';
    
elseif nModel==600
    % Train neural network
    trainX = table2array(TrainX)';
    testX = table2array(TestX)';
    net = fitnet(10);
    net = train(net, trainX, TrainY','useParallel','yes');
    ErrTest  = ComputeErrors(net(testX), TestY');
    ErrTest.train = ComputeErrors(net(trainX), TrainY');
    ML = 'NeuralNetwork-[10]_Layer';
    
elseif nModel==610
    % Train neural network
    trainX = table2array(TrainX)';
    testX = table2array(TestX)';
    net = fitnet([10,10]);
    net = train(net, trainX, TrainY','useParallel','yes');
    ErrTest  = ComputeErrors(net(testX), TestY');
    ErrTest.train = ComputeErrors(net(trainX), TrainY');
    ML = 'NeuralNetwork-[10,10]_Layers';
    
elseif nModel==620
    % Train neural network
    trainX = table2array(TrainX)';
    testX = table2array(TestX)';
    net = fitnet([20,20]);
    net = train(net, trainX, TrainY','useParallel','yes');
    ErrTest  = ComputeErrors(net(testX), TestY');
    ErrTest.train = ComputeErrors(net(trainX), TrainY');
    ML = 'NeuralNetwork-[20,20]_Layers';
    
elseif nModel==650
    % Train neural network, optimize layers
    trainX = table2array(TrainX)';
    testX = table2array(TestX)';
    
    % Consider different layer 
    MAEbest = 1.0e100; lbest = []; nBest = [];
    dl = 10; ml = 40;
    for nEval = 1:5 % mu;tiple evaluations to overcome random initialization problem
        for l1 = dl:dl:ml
            for l2 = 0:dl:ml
                layers = [l1, l2];
                layers = layers(layers>0);
                net = fitnet(layers);
                %net.trainParam.showWindow = false; % no gui when training
                net = train(net, trainX, TrainY','useParallel','yes');
                ErrTest  = ComputeErrors(net(testX), TestY');
                if ErrTest.MAE < MAEbest
                    MAEbest = ErrTest.MAE;
                    lbest = layers;
                    nBest = net;
                    fprintf('\n     MAE=%.3f, nEval=%d, layers=', MAEbest, nEval);   fprintf(' [%d]', lbest);
                end % best result
            end % l2
        end % l1
    end %nEvals

    % Print the best network
    fprintf('\nBEST MAE=%.3f, layers=', MAEbest);   fprintf(' [%d]', lbest);
    fprintf('\n');
    
    % Compute the best network
    net = nBest;
    ErrTest  = ComputeErrors(net(testX), TestY');
    ErrTest.train = ComputeErrors(net(trainX), TrainY');
    ML = 'NeuralNetwork-Optimized_Layers';
    
    % Neural nets do cross-validation - train once
    bTrainOnce = true;

else
    % Default to basic intercept regression
    [ML, ErrTest, bTrainOnce] = ComputeModel(DataX, DataY, 110, MLAge);
end


return




% Compute different error metrics
function E = ComputeErrors(Ypredicted, Y)

Residual = Ypredicted - Y;
E.Residual = Residual(:);
E.Y = Y(:);

% R2
E.ER2 = (std(Residual)^2) / (std(Y)^2);
E.R2 =  1.0 - (std(Residual)^2) / (std(Y)^2);

% Mean absolute error
absR = abs(Residual);
E.MAE = mean(absR);

% Mean absolute squared error
E.MSE = sqrt(mean(Residual.^2));

% Percent of errors under 5 and 10 minutes
E.U05 = length(absR(absR<5))/length(absR);
E.U10 = length(absR(absR<10))/length(absR);


return;

% Experiment with different weights for observations
function w = RobustWeights(residuals)
    ar = abs(residuals);
    threshold = quantile(ar, 0.98);
    w = ones(size(residuals));
    w(ar>threshold) = 0;
    %w = 1./(0.1+abs(residuals));
return;



