import numpy as np from sklearn.metrics import mean_squared_error # cluster mean def cluster_mean(M_original): return(M_original.mean(axis=0)) # smallest root mean square error with respect to the original series def cluster_rmse(M_original, M_draw): if M_draw.shape==(24,): # single unique row return M_draw else: N_days=M_original.shape[0] v_mse=np.array([]) for i in range(M_draw.shape[0]): v_mse=np.append(v_mse, mean_squared_error(M_original.ravel() , np.repeat([M_draw[i]], N_days, axis=0).ravel())) return(M_draw[np.argmin(v_mse)]) # randomly reconstructed yearly time series with the least rmse with respect to the original series def yearly_rmse(s_original, df_samples): v_mse=np.array([]) for n in range(len(df_samples.columns)): v_mse=np.append(v_mse, mean_squared_error(s_original , df_samples.iloc[:,n])) return df_samples.iloc[:, np.argmin(v_mse)] # randomly reconstructed yearly time series with the closest std with respect to the original series def yearly_std(s_original, df_samples): v_diff_std=np.array([]) original_std=s_original.std() for n in range(len(df_samples.columns)): diff_std=np.abs(original_std - df_samples.iloc[:,n].std()) v_diff_std=np.append(v_diff_std, diff_std) return df_samples.iloc[:, np.argmin(v_diff_std)]