%% ============================================================
%  sGprAle: Self-supervised GPR with Autoencoder (Automated)
%  Labeled data ratios: 10% to 60%
%  ------------------------------------------------------------
%  This script:
%   1) Loads ALE data
%   2) Normalizes features
%   3) Splits data into labeled / unlabeled sets
%   4) Trains an autoencoder on unlabeled data
%   5) Trains a GPR model on labeled latent features
%   6) Evaluates performance on unlabeled data
% ============================================================

clear; clc; close all;

%% -------------------- Load Data ------------------------------
data = xlsread('mcs_ds_edited_iter_shuffled.csv');   % ALE dataset
rng(0);                                             % Reproducibility

% Input features (AR, TR, ND, Iterations)
X = data(:,1:4);

% Response variable (Average Localisation Error)
Y = data(:,5);

%% ------------------ Normalize Features -----------------------
[X, muX, sigmaX] = zscore(X);

%% -------- Define labeled data ratios to evaluate -------------
labeledRatios = 0.10:0.10:0.60;
numScenarios  = numel(labeledRatios);

% Store performance metrics
RMSE = zeros(numScenarios,1);
MSE  = zeros(numScenarios,1);

%% ================== Main Loop ================================
for i = 1:numScenarios

    labeledRatio   = labeledRatios(i);
    unlabeledRatio = 1 - labeledRatio;

    fprintf('\n--- Running scenario: %.0f%% labeled data ---\n', ...
            labeledRatio*100);

    %% ----------- Split labeled / unlabeled data --------------
    cv = cvpartition(size(X,1), 'HoldOut', labeledRatio);

    XUnlabeled = X(training(cv), :);
    YUnlabeled = Y(training(cv), :);

    XLabeled   = X(test(cv), :);
    YLabeled   = Y(test(cv), :);

    %% ---------------- Train Autoencoder ----------------------
    % Autoencoder architecture: 4 → 10 → 10 → 5 → 10 → 10 → 4
    hiddenSize = 10;
    autoenc = feedforwardnet([hiddenSize hiddenSize hiddenSize/2]);

    autoenc.trainParam.showWindow = false;   % Disable GUI
    autoenc = train(autoenc, XUnlabeled', XUnlabeled');

    %% ----------- Extract Latent Features ---------------------
    encodedFeaturesUnlabeled = autoenc(XUnlabeled')';
    encodedFeaturesLabeled   = autoenc(XLabeled')';

    %% ---------------- Train GPR Model ------------------------
    gprMdl = fitrgp( ...
        encodedFeaturesLabeled, YLabeled, ...
        'FitMethod','sr', ...
        'BasisFunction','constant', ...
        'ActiveSetMethod','random', ...
        'KernelFunction','exponential', ...
        'Optimizer','quasinewton', ...
        'Verbose',0, ...
        'OptimizeHyperparameters','auto', ...
        'HyperparameterOptimizationOptions', struct( ...
            'MaxObjectiveEvaluations',100, ...
            'Optimizer','bayesopt', ...
            'UseParallel',true));

    %% ---------------- Prediction -----------------------------
    YPredUnlabeled = predict(gprMdl, encodedFeaturesUnlabeled);

    %% ---------------- Evaluation -----------------------------
    MSE(i)  = mean((YPredUnlabeled - YUnlabeled).^2);
    RMSE(i) = sqrt(MSE(i));

    fprintf('MSE  = %.4f | RMSE = %.4f\n', MSE(i), RMSE(i));

    %% -------- Optional Visualization -------------------------
    figure('Name',sprintf('%.0f%% Labeled Data',labeledRatio*100));
    scatter(YUnlabeled, ...
            rescale(YPredUnlabeled, min(YUnlabeled), max(YUnlabeled)), ...
            'filled');
    grid on;
    xlabel('Observed ALE');
    ylabel('Predicted ALE');
    title(sprintf('sGprAle Performance (%.0f%% Labeled)', ...
          labeledRatio*100));
    refline(1,0);   % y = x reference line

end

%% ----------------- Summary Table -----------------------------
Results = table(labeledRatios'*100, MSE, RMSE, ...
    'VariableNames', {'LabeledDataPercent','MSE','RMSE'});

disp('===== Summary of Results =====');
disp(Results);
