import pandas as pd import numpy as np from sklearn.cluster import KMeans def clustering(df, K=20): # default 20 day types N_cols = int(len(df.columns)*24) # flatten each day N_days=int(len(df)/24) M = ((df.values).ravel()).reshape((N_days,N_cols)) kmeans= KMeans(n_clusters=K, random_state=2468).fit(M) return kmeans.labels_, kmeans.cluster_centers_ def df_daily_label(v_labels , year): # create daily day type label dataframe return pd.DataFrame(index=pd.date_range(start='%d-01-01 00:00:00' %year, end='%d-12-31 23:00:00' %year, freq='1D'), columns=['daytype'], data=v_labels) def df_centres(df_original, v_labels, m_centres): # concatenate each day according to day type label N_days=int(len(df_original)/24) N_cols =int(len(df_original.columns)) for i in range(0,N_days): if i==0: M_reduced=m_centres[v_labels[i]].reshape((24, N_cols)) else: M_reduced=np.vstack((M_reduced, m_centres[v_labels[i]].reshape((24, N_cols)))) return pd.DataFrame(index=df_original.index, columns=df_original.columns, data=M_reduced)