167 lines
4.4 KiB
Python
167 lines
4.4 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
|
|
from sklearn.gaussian_process import GaussianProcessRegressor
|
|
from sklearn.gaussian_process.kernels import Matern
|
|
|
|
from scipy.stats import norm
|
|
from scripts.misc import *
|
|
|
|
|
|
def GP_samples(s, n_std=3, max_noise=0.5): # s can be pandas series or ordered 1-d np array
|
|
|
|
n_samples=int(len(s)/24)
|
|
x=np.repeat([np.arange(24)], n_samples, axis=0).ravel()
|
|
|
|
# convert input to 1-d np array
|
|
if isinstance(s, pd.Series):
|
|
y=s.values
|
|
elif isinstance(s, np.ndarray):
|
|
y=s
|
|
else:
|
|
raise ValueError("Input vector must be a pandas series or numpy array.")
|
|
|
|
# set noise parameter
|
|
mean_hourly_std=col_to_M(s).std(axis=0).mean()
|
|
noise=max(n_std*mean_hourly_std, max_noise)
|
|
|
|
# GP kernel
|
|
kernel=1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=2) #nu=3/4 for less smooth knernel
|
|
|
|
#define GP
|
|
gp = GaussianProcessRegressor(kernel=kernel, alpha=noise, random_state=2468)
|
|
|
|
gp.fit(x.reshape(len(x),1), y)
|
|
x_ = np.arange(24)
|
|
|
|
#mean function
|
|
y_mean, y_std = gp.predict(x_.reshape(24,1), return_std=True)
|
|
|
|
# draw 100 posterior samples
|
|
n_y_samples=100
|
|
y_samples = gp.sample_y(x_.reshape(24,1), n_y_samples)
|
|
|
|
|
|
return (y_samples.T)
|
|
|
|
|
|
def mcmc_samples(sM, n_draws=100, start='mean'): # input can be pandas series or ordered 1-d array
|
|
# or maxtrix with the 24 hours in the columns
|
|
|
|
np.random.seed(seed=2468) # fix seed for reproducibility
|
|
|
|
if isinstance(sM, pd.Series):
|
|
M=col_to_M(sM)
|
|
elif isinstance(sM, np.ndarray):
|
|
if sM.ndim ==1:
|
|
M=col_to_M(sM)
|
|
elif sM.ndim ==2 and sM.shape[1]==24:
|
|
M=sM
|
|
else:
|
|
raise ValueError("Input array must have 24-hour columns")
|
|
|
|
else:
|
|
raise ValueError("Input vector must be a pandas series or numpy array.")
|
|
|
|
draws=np.array([])
|
|
|
|
fails=0
|
|
|
|
while draws.shape[0]<n_draws:
|
|
|
|
|
|
sample=np.array([])
|
|
|
|
#start from t=0
|
|
t=0
|
|
|
|
while t<24:
|
|
|
|
if t==0:
|
|
data0=M[:,0]
|
|
mean0, std0=norm.fit(data0)
|
|
#print(mean0, std0)
|
|
|
|
while len(sample)==0:
|
|
if start == 'mean':
|
|
cand0 = norm(mean0, 0.01).rvs()
|
|
else:
|
|
cand0 = norm(mean0, std0).rvs()
|
|
if max(0, data0.min()*0.8) <=cand0<= min(1, data0.max()*1.2):
|
|
sample=np.array([cand0])
|
|
t+=1
|
|
prior=norm.pdf(cand0, mean0, std0)
|
|
|
|
else:
|
|
# compute mu and Sigma for conditional normal
|
|
|
|
data_t=M[:,t]
|
|
data_p=M[:,t-1] # _p for "previous" time step
|
|
|
|
Min_t=data_t.min()
|
|
Max_t=data_t.max()
|
|
|
|
mu_t=data_t.mean()
|
|
mu_p=data_p.mean()
|
|
|
|
Cov=np.cov(data_p, data_t)
|
|
|
|
mu_= mu_t + Cov[1,0]*(sample[t-1]-mu_p)/Cov[0,0]
|
|
Sigma_=Cov[1,1] - Cov[0,1]*Cov[0,1]/Cov[0,0]
|
|
|
|
|
|
# draw a candidate, abort chain after 20 failed attempts
|
|
# return to 0th hour and draw a new initial point
|
|
attempts=1
|
|
while attempts<=20:
|
|
|
|
cand=norm(mu_, np.sqrt(Sigma_)).rvs()
|
|
prior_cand=norm.pdf(cand ,mu_t, np.sqrt(Cov[1,1]))
|
|
alpha=min([1, prior_cand/prior])
|
|
attempts+=1
|
|
|
|
if (np.random.uniform(0,1) < alpha) & (cand>=Min_t*0.8) & (cand<=Max_t*1.2):
|
|
sample=np.append(sample , cand)
|
|
prior=prior_cand
|
|
t+=1
|
|
|
|
break
|
|
else:
|
|
sample=np.array([])
|
|
t=0 # rest chain
|
|
|
|
# abort mcmc sampling after 100 failed attempts
|
|
fails+=1
|
|
if fails>100:
|
|
raise TimeoutError
|
|
break
|
|
|
|
if sample.size==24 and draws.size ==0:
|
|
draws=np.array([sample])
|
|
elif sample.size==24 and draws.size >0:
|
|
draws=np.vstack([draws, sample])
|
|
else:
|
|
pass
|
|
|
|
|
|
return draws
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|