european_electricity/scripts/kmeans.py

32 lines
1.1 KiB
Python

import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
def clustering(df, K=20): # default 20 day types
N_cols = int(len(df.columns)*24) # flatten each day
N_days=int(len(df)/24)
M = ((df.values).ravel()).reshape((N_days,N_cols))
kmeans= KMeans(n_clusters=K, random_state=2468).fit(M)
return kmeans.labels_, kmeans.cluster_centers_
def df_daily_label(v_labels , year): # create daily day type label dataframe
return pd.DataFrame(index=pd.date_range(start='%d-01-01 00:00:00' %year, end='%d-12-31 23:00:00' %year, freq='1D'), columns=['daytype'], data=v_labels)
def df_centres(df_original, v_labels, m_centres): # concatenate each day according to day type label
N_days=int(len(df_original)/24)
N_cols =int(len(df_original.columns))
for i in range(0,N_days):
if i==0:
M_reduced=m_centres[v_labels[i]].reshape((24, N_cols))
else:
M_reduced=np.vstack((M_reduced, m_centres[v_labels[i]].reshape((24, N_cols))))
return pd.DataFrame(index=df_original.index, columns=df_original.columns, data=M_reduced)