european_data/scripts/misc.py

55 lines
1.9 KiB
Python

import pandas as pd
import numpy as np
# convert 1-dimensional hourly time series into numpy array with 24 columns and corresponding number of days (rows)
# option to repeat first and last hours N times everyday (used in clustering)
def col_to_M(v_column, repeat=None):
if isinstance(v_column, pd.Series):
N_days=int(v_column.count()/24) # to take leap year into account
series=v_column.values
elif isinstance(v_column, np.ndarray):
N_days=int(len(v_column)/24)
series=v_column
else:
raise ValueError("Input vector must be a pandas series or numpy array.")
if repeat is None:
return series.reshape(N_days, 24)
elif isinstance(repeat,int):
# repeat first and last hours to increase their weights
M=series.reshape(N_days, 24)
return np.concatenate([np.repeat(M[:,0].reshape(N_days,1), repeat, axis=1), M, np.repeat(M[:,-1].reshape(N_days,1), repeat, axis=1)], axis=1)
else:
raise ValueError("The number of repeats must be an integer!")
# select valid draws by start and end values (for continuity)
def get_valid_draws(M_original , M_draw, epsilon=0.05):
if M_draw.ndim == 1:
M_valid = M_draw
elif M_draw.ndim == 2 and M_draw.shape[0]<=5:
M_valid = M_draw
else:
start_mean=M_original[:,0].mean()
end_mean=M_original[:,-1].mean()
M_valid=M_draw[np.where((M_draw[:,0]<start_mean+epsilon) & (M_draw[:,0]>start_mean-epsilon) &
(M_draw[:,-1]<end_mean+epsilon) & (M_draw[:,-1]>end_mean-epsilon))]
# valid draws must also have all values between 0 and 1 (1.1 to allow for scaling back down to original maximum later on)
M_valid=M_valid[np.all((M_valid>=0) & (M_valid<=1.1), axis=1)]
if M_valid.size==0:
M_valid=M_original.mean(axis=0)
return M_valid