55 lines
1.9 KiB
Python
55 lines
1.9 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
|
|
# convert 1-dimensional hourly time series into numpy array with 24 columns and corresponding number of days (rows)
|
|
# option to repeat first and last hours N times everyday (used in clustering)
|
|
def col_to_M(v_column, repeat=None):
|
|
|
|
if isinstance(v_column, pd.Series):
|
|
N_days=int(v_column.count()/24) # to take leap year into account
|
|
series=v_column.values
|
|
elif isinstance(v_column, np.ndarray):
|
|
N_days=int(len(v_column)/24)
|
|
series=v_column
|
|
else:
|
|
raise ValueError("Input vector must be a pandas series or numpy array.")
|
|
|
|
if repeat is None:
|
|
return series.reshape(N_days, 24)
|
|
|
|
elif isinstance(repeat,int):
|
|
# repeat first and last hours to increase their weights
|
|
M=series.reshape(N_days, 24)
|
|
return np.concatenate([np.repeat(M[:,0].reshape(N_days,1), repeat, axis=1), M, np.repeat(M[:,-1].reshape(N_days,1), repeat, axis=1)], axis=1)
|
|
|
|
else:
|
|
raise ValueError("The number of repeats must be an integer!")
|
|
|
|
# select valid draws by start and end values (for continuity)
|
|
def get_valid_draws(M_original , M_draw, epsilon=0.05):
|
|
|
|
if M_draw.ndim == 1:
|
|
M_valid = M_draw
|
|
|
|
elif M_draw.ndim == 2 and M_draw.shape[0]<=5:
|
|
M_valid = M_draw
|
|
|
|
else:
|
|
start_mean=M_original[:,0].mean()
|
|
end_mean=M_original[:,-1].mean()
|
|
|
|
M_valid=M_draw[np.where((M_draw[:,0]<start_mean+epsilon) & (M_draw[:,0]>start_mean-epsilon) &
|
|
(M_draw[:,-1]<end_mean+epsilon) & (M_draw[:,-1]>end_mean-epsilon))]
|
|
|
|
# valid draws must also have all values between 0 and 1 (1.1 to allow for scaling back down to original maximum later on)
|
|
M_valid=M_valid[np.all((M_valid>=0) & (M_valid<=1.1), axis=1)]
|
|
|
|
if M_valid.size==0:
|
|
M_valid=M_original.mean(axis=0)
|
|
|
|
return M_valid
|
|
|
|
|
|
|
|
|