added selection.py to select the best sample from the random samples according to user defined selection rules
rearranged main.py
This commit is contained in:
parent
7a41174593
commit
9082e92f90
|
@ -0,0 +1,80 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import pickle
|
||||
import os
|
||||
|
||||
import scripts.read as read
|
||||
from scripts.misc import *
|
||||
from scripts.selection import *
|
||||
|
||||
# load clustering daylabels
|
||||
with open('intermediate/kmeans30_daylabels.pickle', 'rb') as handle:
|
||||
daylabels = pickle.load(handle)
|
||||
|
||||
# load valid draws
|
||||
with open('intermediate/kmeans30_valid_draw_samples.pickle', 'rb') as handle:
|
||||
valid_draws = pickle.load(handle)
|
||||
|
||||
# load target df
|
||||
df_raw= read.load_csv('input/data_mod.csv', index_col='UTC')
|
||||
df_raw['daylabel']=np.array([np.repeat(i,24) for i in daylabels]).ravel()
|
||||
|
||||
# load selection rules
|
||||
df_rule=pd.read_csv('baseline_selections.csv', usecols=[0,1], index_col='keyword')
|
||||
keywords=list(df_rule.index)
|
||||
dict_rule=dict(zip(keywords, df_rule['selection_rule'].values))
|
||||
|
||||
# array of daytypes
|
||||
daytypes=np.sort(df_raw['daylabel'].unique())
|
||||
|
||||
# max VRE CFs
|
||||
with open('input/VRE_max_CF.pickle', 'rb') as handle:
|
||||
VRE_max = pickle.load(handle)
|
||||
|
||||
# initialise output dataframe
|
||||
df_baseline=pd.DataFrame(index=df_raw.index, columns=df_raw.columns)
|
||||
df_baseline.index.name='UTC'
|
||||
df_baseline['daylabel']=df_raw['daylabel']
|
||||
|
||||
for col in df_raw.columns:
|
||||
if col != 'daylabel':
|
||||
for keyword in keywords:
|
||||
if keyword in col:
|
||||
selection_rule=dict_rule[keyword]
|
||||
|
||||
if 'yearly' in selection_rule:
|
||||
s_original=df_raw[col]
|
||||
df_samples=read.load_csv('intermediate/random_yearly_dfs/random_yearly_%s.csv' %col, index_col='UTC')
|
||||
s=eval(selection_rule)(s_original, df_samples)
|
||||
df_baseline[col]=s
|
||||
print(col)
|
||||
|
||||
elif 'cluster' in selection_rule:
|
||||
for dtype in daytypes:
|
||||
dtype_idx=df_raw.loc[df_raw['daylabel']==dtype].index
|
||||
N_dtype_days=int(len(dtype_idx)/24)
|
||||
M_original=col_to_M(df_raw.loc[df_raw['daylabel']==dtype, col])
|
||||
if selection_rule=='cluster_mean':
|
||||
best_sample=cluster_mean(M_original)
|
||||
else:
|
||||
M_draw=valid_draws[col][dtype]
|
||||
beset_sample=eval(selection_rule)(M_original, M_draw)
|
||||
df_baseline.loc[dtype_idx, col]=np.tile(beset_sample, N_dtype_days)
|
||||
print(col)
|
||||
else:
|
||||
raise ValueError(col, " Please check the selection rule")
|
||||
|
||||
if 'VRE' in col:
|
||||
df_baseline[col]=s*VRE_max[col]
|
||||
df_baseline[col]=df_baseline[col].clip(0. ,1.)
|
||||
else:
|
||||
col_total=df_baseline[col].sum()
|
||||
df_baseline[col]=df_baseline[col]/col_total*1000000
|
||||
|
||||
# check if output directory exists
|
||||
if not os.path.exists('output'):
|
||||
os.makedirs('output')
|
||||
|
||||
df_baseline.to_csv('output/.csv')
|
||||
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
keyword,selection_rule,note
|
||||
shore,yearly_std,both on- and offshore wind
|
||||
pv,yearly_rmse,both rooftop (rtpv) and utility (upv)
|
||||
RES_heating,yearly_std,
|
||||
COM_heating,cluster_rmse,
|
||||
cooling,yearly_rmse,both COM and RES
|
||||
RES_SE,cluster_mean,
|
||||
COM_SE,cluster_rmse,
|
||||
RES_shw,cluster_rmse,
|
||||
COM_shw,cluster_mean,
|
||||
RES_cooking,cluster_mean,all profiles are identical
|
||||
COM_catering,cluster_mean,
|
||||
IND,yearly_rmse,
|
||||
TRN,cluster_rmse,all three types of transport
|
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
17522
input/data_mod.csv
17522
input/data_mod.csv
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
40
main.py
40
main.py
|
@ -161,6 +161,28 @@ for col in [col for col in df_draw.columns if 'label' not in col]:
|
|||
|
||||
print(col)
|
||||
|
||||
#######
|
||||
# OUTPUT
|
||||
#######
|
||||
|
||||
# create output sub directory if not already exists
|
||||
if not os.path.exists('intermediate'):
|
||||
os.makedirs('intermediate')
|
||||
|
||||
# output kmeans daylabels
|
||||
with open('intermediate/kmeans%d_daylabels.pickle' %K, 'wb') as handle:
|
||||
pickle.dump(kmeans_daylabels, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
# output random sampling results
|
||||
with open('intermediate/kmeans%d_draw_samples.pickle' %K, 'wb') as handle:
|
||||
pickle.dump(draws, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
# output valid draw samples
|
||||
with open('intermediate/kmeans%d_valid_draw_samples.pickle' %K, 'wb') as handle:
|
||||
pickle.dump(valid_draws, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
|
||||
|
||||
############
|
||||
# RANDOM YEARLY TIME SERIES
|
||||
|
@ -215,23 +237,5 @@ for col in df_draw.columns:
|
|||
|
||||
|
||||
|
||||
#######
|
||||
# OUTPUT
|
||||
#######
|
||||
|
||||
# create output sub directory if not already exists
|
||||
if not os.path.exists('intermediate'):
|
||||
os.makedirs('intermediate')
|
||||
|
||||
# output kmeans daylabels
|
||||
with open('intermediate/kmeans%d_daylabels.pickle' %K, 'wb') as handle:
|
||||
pickle.dump(kmeans_daylabels, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
# output random sampling results
|
||||
with open('intermediate/kmeans%d_draw_samples.pickle' %K, 'wb') as handle:
|
||||
pickle.dump(draws, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
# output valid draw samples
|
||||
with open('intermediate/kmeans%d_valid_draw_samples.pickle' %K, 'wb') as handle:
|
||||
pickle.dump(valid_draws, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
import numpy as np
|
||||
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
|
||||
# cluster mean
|
||||
def cluster_mean(M_original):
|
||||
return(M_original.mean(axis=0))
|
||||
|
||||
|
||||
# smallest root mean square error with respect to the original series
|
||||
def cluster_rmse(M_original, M_draw):
|
||||
|
||||
if M_draw.shape==(24,): # single unique row
|
||||
return M_draw
|
||||
|
||||
else:
|
||||
N_days=M_original.shape[0]
|
||||
v_mse=np.array([])
|
||||
for i in range(M_draw.shape[0]):
|
||||
v_mse=np.append(v_mse, mean_squared_error(M_original.ravel() ,
|
||||
np.repeat([M_draw[i]], N_days, axis=0).ravel()))
|
||||
|
||||
return(M_draw[np.argmin(v_mse)])
|
||||
|
||||
|
||||
# randomly reconstructed yearly time series with the least rmse with respect to the original series
|
||||
def yearly_rmse(s_original, df_samples):
|
||||
v_mse=np.array([])
|
||||
for n in range(len(df_samples.columns)):
|
||||
v_mse=np.append(v_mse, mean_squared_error(s_original , df_samples.iloc[:,n]))
|
||||
|
||||
return df_samples.iloc[:, np.argmin(v_mse)]
|
||||
|
||||
|
||||
# randomly reconstructed yearly time series with the closest std with respect to the original series
|
||||
|
||||
def yearly_std(s_original, df_samples):
|
||||
v_diff_std=np.array([])
|
||||
original_std=s_original.std()
|
||||
for n in range(len(df_samples.columns)):
|
||||
diff_std=np.abs(original_std - df_samples.iloc[:,n].std())
|
||||
v_diff_std=np.append(v_diff_std, diff_std)
|
||||
|
||||
return df_samples.iloc[:, np.argmin(v_diff_std)]
|
Loading…
Reference in New Issue