import pandas as pd import numpy as np # convert 1-dimensional hourly time series into numpy array with 24 columns and corresponding number of days (rows) # option to repeat first and last hours N times everyday (used in clustering) def col_to_M(v_column, repeat=None): if isinstance(v_column, pd.Series): N_days=int(v_column.count()/24) # to take leap year into account series=v_column.values elif isinstance(v_column, np.ndarray): N_days=int(len(v_column)/24) series=v_column else: raise ValueError("Input vector must be a pandas series or numpy array.") if repeat is None: return series.reshape(N_days, 24) elif isinstance(repeat,int): # repeat first and last hours to increase their weights M=series.reshape(N_days, 24) return np.concatenate([np.repeat(M[:,0].reshape(N_days,1), repeat, axis=1), M, np.repeat(M[:,-1].reshape(N_days,1), repeat, axis=1)], axis=1) else: raise ValueError("The number of repeats must be an integer!") # select valid draws by start and end values (for continuity) def get_valid_draws(M_original , M_draw, epsilon=0.05): if M_draw.ndim == 1: M_valid = M_draw elif M_draw.ndim == 2 and M_draw.shape[0]<=5: M_valid = M_draw else: start_mean=M_original[:,0].mean() end_mean=M_original[:,-1].mean() M_valid=M_draw[np.where((M_draw[:,0]start_mean-epsilon) & (M_draw[:,-1]end_mean-epsilon))] # valid draws must also have all values between 0 and 1 (1.1 to allow for scaling back down to original maximum later on) M_valid=M_valid[np.all((M_valid>=0) & (M_valid<=1.1), axis=1)] if M_valid.size==0: M_valid=M_original.mean(axis=0) return M_valid