european_data/scripts/selection.py

46 lines
1.5 KiB
Python

import numpy as np
from sklearn.metrics import mean_squared_error
# cluster mean
def cluster_mean(M_original):
return(M_original.mean(axis=0))
# smallest root mean square error with respect to the original series
def cluster_rmse(M_original, M_draw):
if M_draw.shape==(24,): # single unique row
return M_draw
else:
N_days=M_original.shape[0]
v_mse=np.array([])
for i in range(M_draw.shape[0]):
v_mse=np.append(v_mse, mean_squared_error(M_original.ravel() ,
np.repeat([M_draw[i]], N_days, axis=0).ravel()))
return(M_draw[np.argmin(v_mse)])
# randomly reconstructed yearly time series with the least rmse with respect to the original series
def yearly_rmse(s_original, df_samples):
v_mse=np.array([])
for n in range(len(df_samples.columns)):
v_mse=np.append(v_mse, mean_squared_error(s_original , df_samples.iloc[:,n]))
return df_samples.iloc[:, np.argmin(v_mse)]
# randomly reconstructed yearly time series with the closest std with respect to the original series
def yearly_std(s_original, df_samples):
v_diff_std=np.array([])
original_std=s_original.std()
for n in range(len(df_samples.columns)):
diff_std=np.abs(original_std - df_samples.iloc[:,n].std())
v_diff_std=np.append(v_diff_std, diff_std)
return df_samples.iloc[:, np.argmin(v_diff_std)]