In [1]:
import numpy as np
from scipy.interpolate import BSpline
import scipy.interpolate as interpolate
from scipy import optimize
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
from numba import jit
import pickle as pkl
import pandas as pd
from scipy import interpolate
from scipy import stats
# change font settings
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 24
Hawkes Process $$ \Lambda(t)=\mu+\int_{-\infty}^{t} g(t-u) d N(u). $$ We consider a special case that $g(t)$ takes exponential function, $$ g(t)=\alpha e^{-\beta t} $$
Simulation¶
In [2]:
def simulate_hawkes(mu, alpha, beta, num_datapoints = 10000, neuton_iter_num = 800):
# step1
U = np.random.uniform(0, 1)
# step2
t1 = -np.log(U)/mu
t = [t1]
for k in range(num_datapoints):
# step3
U = np.random.uniform(0, 1)
# step 4
if k == 0:
Sk = 1
u = t[k] - np.log(U)/mu # initial value of iteration
for _ in range(neuton_iter_num):
fu = np.log(U) + mu*(u-t[k]) + alpha/beta*Sk*(1-np.exp(-beta*(u-t[k])))
fu_dash = mu+alpha*Sk*np.exp(-beta*(u-t[k]))
u -= fu/fu_dash
# step 5
t.append(u)
Sk = np.exp(-beta*(t[k+1]-t[k]))*Sk + 1 # find S(k+1)
t = np.array(t)
# intensity
time = np.linspace(0,1,10000)*max(t)
increase = list(map(lambda x: alpha*np.exp(-beta * x), time))
intensity = np.zeros(10000*2)
for timepoints in t:
yuko = np.append(np.append(np.zeros(len(time[time<timepoints])), increase), np.zeros(len(time[time>=timepoints])))
intensity += yuko
intensity += mu
return t, time, intensity
In [3]:
# hyperparameters
mu = 1
alpha = 1.5
beta = 3
# mu = 0.8
# alpha = 11
# beta = 30
t, time, intensity = simulate_hawkes(mu=mu, alpha=alpha, beta=beta, num_datapoints = 15, neuton_iter_num = 800)
fig, ax1 = plt.subplots(figsize=(20, 5))
ax2 = ax1.twinx()
ax2.scatter(t, np.ones(len(t)), facecolors='none', edgecolors='red', marker='o', s=150)
ax1.plot(time, intensity[:10000], c='b', lw=1)
# set graph labels
ax1.set_xlabel("t")
ax1.set_ylabel("intensity")
plt.tick_params(labelright=False)
plt.tick_params(right=False)
plt.savefig('./intensity.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
Here, red circles represents the occurance of events, and the blue line shows the underlying intensity.
The intensity rises when event occures, and diminishes over time
In [4]:
t_pv, time_pv, intensity_pv = simulate_hawkes(mu=mu, alpha=alpha, beta=beta, num_datapoints = 1000, neuton_iter_num = 800)
fig = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
for tp in t_pv:
ax.plot([tp,tp], [0,1], 'r-', lw=0.7)
plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)
plt.savefig('./pvsim.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
↑Simulated Hawkes process. One red line = one event.
Reference: Simulation with independent point process where $\tau \sim U(0,1)$¶
In [5]:
unif = []
for _ in range(1000):
unif.append(np.random.uniform(0, 1))
unif = np.cumsum(unif)
fig = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
for tp in unif:
ax.plot([tp,tp], [0,1], 'r-', lw=0.7)
plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)
plt.savefig('./unifsim.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
↑Compared with simulated hawkes process, the lines are uniformly distributed (no clustering).
Maximum Likelihood Estimation¶
In [6]:
# param:[mu, alpha, beta]
@jit
def cost_MLE(param, t):
likelihood = 0
# first term
likelihood -= param[0]*t[-1]
for i in range(len(t)):
# 2nd term
likelihood += param[1]/param[2] * (np.exp(-param[2]*(t[-1]-t[i]))-1)
# 3rd term
## Calculate A(i) first
if i == 0:
Ai = 0
else:
Ai = 0
for j in range(i):
Ai += np.exp(-param[2] * (t[i] - t[j]))
likelihood += np.log(param[0] + param[1] * Ai)
return -likelihood
# initial guess and boundary
param0 = [0.01, 0.2, 5]
bound = [(0, None),(0.0001, None),(0,None)]
# maximum likelihood
params_MLE = optimize.minimize(cost_MLE,param0,args=(t),method='L-BFGS-B',\
jac=None, bounds=bound, tol=None, callback=None,\
options={'disp': None, 'maxls': 20, 'iprint': -1,\
'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 15000,\
'ftol': 2.220446049250313e-09, 'maxcor': 10,\
'maxfun': 15000})
Examine on simulated data¶
In [7]:
# hyperparameters
mu = 1
alpha = 1.5
beta = 3
t, time, intensity = simulate_hawkes(mu=mu, alpha=alpha, beta=beta, num_datapoints = 10000, neuton_iter_num = 800)
In [8]:
params_MLE = optimize.minimize(cost_MLE,param0,args=(t),method='L-BFGS-B',\
jac=None, bounds=bound, tol=None, callback=None,\
options={'disp': None, 'maxls': 20, 'iprint': -1,\
'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 15000,\
'ftol': 2.220446049250313e-09, 'maxcor': 10,\
'maxfun': 15000})
In [9]:
print('estimation finished successfully:', params_MLE['success'])
print('estimated params:', params_MLE['x'])
print('true params:', np.array([mu, alpha, beta]))
Time Varying Parameters¶
In [10]:
# base parameters
mu_base = 0.8
alpha_base = 11
beta_base = 30
A_mu = 0.5
A_alpha = 4
A_beta = 10
In [11]:
def time_varying_simulate_hawkes(mu_base, alpha_base, beta_base, A_mu, A_alpha, A_beta,
T_max, neuton_iter_num = 800, find_intensity=True):
'''
The difference is that it can take over the previous intensity.
'''
# step1
U = np.random.uniform(0, 1)
# step2
mu = mu_base + A_mu*np.cos(0*2*np.pi/T_max)
t1 = -np.log(U)/mu
t = [t1]
u = 0
k = 0
while u < T_max:
# update parameters
mu = mu_base + A_mu*np.cos(u*2*np.pi/T_max)
alpha = alpha_base + A_alpha*np.cos(u*2*np.pi/T_max)
beta = beta_base + A_beta*np.cos(u*2*np.pi/T_max)
# step3
U = np.random.uniform(0, 1)
# step 4
if k == 0:
Sk = 1
u = t[k] - np.log(U)/mu # initial value of iteration
for _ in range(neuton_iter_num):
fu = np.log(U) + mu*(u-t[k]) + alpha/beta*Sk*(1-np.exp(-beta*(u-t[k])))
fu_dash = mu+alpha*Sk*np.exp(-beta*(u-t[k]))
u -= fu/fu_dash
# step 5
t.append(u)
Sk = np.exp(-beta*(t[k+1]-t[k]))*Sk + 1 # find S(k+1)
k += 1
t = np.array(t)
# intensity
time = np.linspace(0,1,100000)*max(t)
if not find_intensity:
return t, time
increase = list(map(lambda x: alpha_base*np.exp(-beta_base * x), time))
intensity = np.zeros(100000*2)
for timepoints in t:
yuko = np.append(np.append(np.zeros(len(time[time<timepoints])), increase), np.zeros(len(time[time>=timepoints])))
intensity += yuko
return t, time, intensity
In [12]:
t_sim, time_sim, intensity_sim = time_varying_simulate_hawkes(mu_base=0.8, alpha_base=11, beta_base=30,
A_mu=0.5, A_alpha=4, A_beta=10,
T_max=1000)
In [13]:
fig = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
for tp in t_sim:
ax.plot([tp,tp], [0,1], 'r-', lw=0.7)
plt.xlabel('\n$t$')
plt.tick_params(labelleft=False, labelbottom=False)
plt.tick_params(left=False, bottom=False)
plt.savefig('./tvsim.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
↑It captures intraday seasonality: there are less trades in the middle.
In [14]:
fig, ax1 = plt.subplots(figsize=(20, 5))
ax2 = ax1.twinx()
ax1.scatter(t_sim[:20], np.ones(len(t_sim[:20])), facecolors='none', edgecolors='red', marker='*', s=150)
ax2.plot(time_sim[time_sim<t_sim[20]], intensity_sim[:len(time_sim[time_sim<t_sim[20]])], c='b', lw=1)
plt.show()
Time-Varying Maximum Likelihood Estimation with Chopping¶
In [15]:
# create larger data
t_sim, time_sim, intensity_sim = time_varying_simulate_hawkes(mu_base=0.8, alpha_base=11, beta_base=30,
A_mu=0.5, A_alpha=4, A_beta=10,
T_max=20000)
In [16]:
# param:[mu, alpha, beta]
@jit
def cost_MLE(param, t):
likelihood = 0
# first term
likelihood -= param[0]*t[-1]
for i in range(len(t)):
# 2nd term
likelihood += param[1]/param[2] * (np.exp(-param[2]* (t[-1]-t[i]) )-1)
# 3rd term
## Calculate A(i) first
if i == 0:
Ai = 0
else:
Ai = 0
for j in range(i):
Ai += np.exp(-param[2] * (t[i] - t[j]))
likelihood += np.log(param[0] + param[1] * Ai)
return -likelihood
# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
# maximum likelihood
num_split = 16
def chopped_mle(param0, bound, num_split, t):
chopped_t_stard_end = []
chopped_mus = []
chopped_alphas = []
chopped_betas = []
for i in range(num_split):
chopped_t = t[np.where((t > i*t[-1]/num_split) & (t <= (i+1)*t[-1]/num_split+0.001), True, False)]
chopped_t = chopped_t-chopped_t[0]
chopped_t = chopped_t[1:]
chopped_t_stard_end.append([t[-1]/num_split*i, t[-1]/num_split*(i+1)])
params_MLE = optimize.minimize(cost_MLE,param0,args=(chopped_t),method='Nelder-Mead',\
jac=None, tol=None, callback=None,\
options={'disp': None, 'ftol': 2.220446049250313e-09})
# print(chopped_t, params_MLE['x'][0])
chopped_mus.append(params_MLE['x'][0])
chopped_alphas.append(params_MLE['x'][1])
chopped_betas.append(params_MLE['x'][2])
return chopped_t_stard_end, chopped_mus, chopped_alphas, chopped_betas
In [17]:
# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
num_split = 16
chopped_t_stard_end_sim, chopped_mus_sim, chopped_alphas_sim, chopped_betas_sim = chopped_mle(param0, bound, num_split, t_sim)
In [18]:
_t = np.linspace(0,chopped_t_stard_end_sim[-1][1],100)
_T = np.max(_t)
cos = np.cos(t_sim*2*np.pi/_T)
mu_star = mu_base + A_mu * cos
alpha_star = alpha_base + A_alpha * cos
beta_star = beta_base + A_beta * cos
In [19]:
# spline function
def spline3(x,y,point,deg):
tck,u = interpolate.splprep([x,y],k=deg,s=0)
u = np.linspace(0,1,num=point,endpoint=True)
spline = interpolate.splev(u,tck)
return spline[0],spline[1]
In [20]:
x, y = [], []
for start_end, mu in zip(chopped_t_stard_end_sim, chopped_mus_sim):
x.append(np.mean([start_end[0],start_end[1]]))
y.append(mu)
fig = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(t_sim, mu_star, 'green', linestyle = ":", lw=4, label='true')
plt.plot(a4,b4,label="spline", linewidth=3)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='lower right')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$\mu$ ', fontsize=28, rotation=0)
plt.tick_params(labelbottom=False)
plt.tick_params(bottom=False)
plt.savefig('./muest.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [21]:
x, y = [], []
for start_end, alpha in zip(chopped_t_stard_end_sim, chopped_alphas_sim):
x.append(np.mean([start_end[0],start_end[1]]))
y.append(alpha)
fig = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(t_sim, alpha_star, 'green', linestyle = ":", lw=4, label='true')
plt.plot(a4,b4,label="spline", linewidth=3)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='lower right')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('α ', fontsize=28, rotation=0)
plt.tick_params(labelbottom=False)
plt.tick_params(bottom=False)
plt.savefig('./alphaest.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [22]:
x, y = [], []
for start_end, beta in zip(chopped_t_stard_end_sim, chopped_betas_sim):
x.append(np.mean([start_end[0],start_end[1]]))
y.append(beta)
fig = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(t_sim, beta_star, 'green', linestyle = ":", lw=4, label='true')
plt.plot(a4,b4,label="spline", linewidth=3)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='lower right')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('β ', fontsize=28, rotation=0)
plt.tick_params(labelbottom=False)
plt.tick_params(bottom=False)
plt.savefig('./betaest.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
Application on Real Data¶
Toyota's stock data¶
In [23]:
df = pd.read_csv("../shared_acd/data/toyota20170605to0609_Z.csv")
df.head()
Out[23]:
In [24]:
t_toyota = np.array(np.cumsum(df[df['date']==20170606]['duration']))
pd.DataFrame(pd.Series(t_toyota.ravel()).describe()).transpose()
Out[24]:
In [25]:
fig = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
for tp in t_toyota:
ax.plot([tp,tp], [0,1], 'r-', lw=0.18)
plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)
plt.savefig('./toyota.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [26]:
fig = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
t_toyota_morning = t_toyota[:5447]
for tp in t_toyota_morning:
ax.plot([tp,tp], [0,1], 'r-', lw=0.18)
plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)
plt.savefig('./toyota_morning.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [27]:
fig = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
t_toyota_morning = t_toyota[5447:]
for tp in t_toyota_morning:
ax.plot([tp,tp], [0,1], 'r-', lw=0.18)
plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)
plt.savefig('./toyota_afternoon.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [28]:
# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
num_split = 16
chopped_t_stard_end_toyota, chopped_mus_toyota, chopped_alphas_toyota, chopped_betas_toyota = chopped_mle(param0, bound, num_split, t_toyota)
In [29]:
x, y = [], []
for start_end, mu in zip(chopped_t_stard_end_toyota, chopped_mus_toyota):
x.append(np.mean([start_end[0],start_end[1]]))
y.append(mu)
fig = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$\mu$ ', fontsize=28, rotation=0)
plt.savefig('./toyotamu.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [30]:
x, y = [], []
for start_end, alpha in zip(chopped_t_stard_end_toyota, chopped_alphas_toyota):
x.append(np.mean([start_end[0],start_end[1]]))
y.append(alpha)
fig = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$α$ ', fontsize=28, rotation=0)
plt.savefig('./toyotaalpha.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [31]:
x, y = [], []
for start_end, beta in zip(chopped_t_stard_end_toyota, chopped_betas_toyota):
x.append(np.mean([start_end[0],start_end[1]]))
y.append(beta)
fig = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$β$ ', fontsize=28, rotation=0)
plt.savefig('./toyotabeta.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
Nagatanien's stock data¶
In [32]:
nagatani = pd.read_csv("../shared_acd/data/nagatanien20170605to0609_Z.csv")
nagatani.head()
Out[32]:
In [33]:
t_nagatani = np.array(np.cumsum(nagatani[nagatani['date']==20170606]['duration']))
pd.DataFrame(pd.Series(t_nagatani.ravel()).describe()).transpose()
Out[33]:
In [34]:
t_nagatani = np.array(np.cumsum(nagatani['duration']))
pd.DataFrame(pd.Series(t_nagatani.ravel()).describe()).transpose()
Out[34]:
In [35]:
fig = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
for tp in t_nagatani:
ax.plot([tp,tp], [0,1], 'r-', lw=3)
plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)
plt.savefig('./nagatani.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [36]:
# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
num_split = 15
chopped_t_stard_end_nagatani, chopped_mus_nagatani, chopped_alphas_nagatani, chopped_betas_nagatani = chopped_mle(param0, bound, num_split, t_nagatani)
In [37]:
nxmu, nymu = [], []
for start_end, mu in zip(chopped_t_stard_end_nagatani, chopped_mus_nagatani):
nxmu.append(np.mean([start_end[0],start_end[1]]))
nymu.append(mu)
fig = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(nxmu,nymu,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(nxmu, nymu, 'ro',label="controlpoint", markersize=10)
xaxis_ = ax.xaxis
xaxis_.set_major_locator(ticker.FixedLocator(np.linspace(0, 90000, 6)[:-1]))
xaxis_.set_ticklabels(["June-5", "June-6", "June-7", "June-8", "June-9"])
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$\mu$ ', fontsize=28, rotation=0)
plt.savefig('./nagatanimu.png',bbox_inches='tight', pad_inches=0.2)
plt.show()