32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
|
|
from sklearn.cluster import KMeans
|
|
|
|
def clustering(df, K=20): # default 20 day types
|
|
|
|
N_cols = int(len(df.columns)*24) # flatten each day
|
|
N_days=int(len(df)/24)
|
|
M = ((df.values).ravel()).reshape((N_days,N_cols))
|
|
|
|
kmeans= KMeans(n_clusters=K, random_state=2468).fit(M)
|
|
|
|
return kmeans.labels_, kmeans.cluster_centers_
|
|
|
|
def df_daily_label(v_labels , year): # create daily day type label dataframe
|
|
|
|
return pd.DataFrame(index=pd.date_range(start='%d-01-01 00:00:00' %year, end='%d-12-31 23:00:00' %year, freq='1D'), columns=['daytype'], data=v_labels)
|
|
|
|
|
|
|
|
def df_centres(df_original, v_labels, m_centres): # concatenate each day according to day type label
|
|
N_days=int(len(df_original)/24)
|
|
N_cols =int(len(df_original.columns))
|
|
for i in range(0,N_days):
|
|
if i==0:
|
|
M_reduced=m_centres[v_labels[i]].reshape((24, N_cols))
|
|
else:
|
|
M_reduced=np.vstack((M_reduced, m_centres[v_labels[i]].reshape((24, N_cols))))
|
|
|
|
return pd.DataFrame(index=df_original.index, columns=df_original.columns, data=M_reduced)
|