european_data/scripts/selection.py

import numpy as np

from sklearn.metrics import mean_squared_error


# cluster mean
def cluster_mean(M_original):
    return(M_original.mean(axis=0))


# smallest root mean square error with respect to the original series
def cluster_rmse(M_original, M_draw):

    if M_draw.shape==(24,): # single unique row
        return M_draw

    else:
        N_days=M_original.shape[0]
        v_mse=np.array([])
        for i in range(M_draw.shape[0]):
            v_mse=np.append(v_mse, mean_squared_error(M_original.ravel() ,
                                                    np.repeat([M_draw[i]], N_days, axis=0).ravel()))

        return(M_draw[np.argmin(v_mse)])


# randomly reconstructed yearly time series with the least rmse with respect to the original series
def yearly_rmse(s_original, df_samples):
    v_mse=np.array([])
    for n in range(len(df_samples.columns)):
        v_mse=np.append(v_mse, mean_squared_error(s_original , df_samples.iloc[:,n]))

    return df_samples.iloc[:, np.argmin(v_mse)]


# randomly reconstructed yearly time series with the closest std with respect to the original series

def yearly_std(s_original, df_samples):
    v_diff_std=np.array([])
    original_std=s_original.std()
    for n in range(len(df_samples.columns)):
        diff_std=np.abs(original_std -  df_samples.iloc[:,n].std())
        v_diff_std=np.append(v_diff_std, diff_std)

    return df_samples.iloc[:, np.argmin(v_diff_std)]