Simulation of Hawkes Process¶

Hawkes process is useful when we model a self-exciting point process¶

Self-exciting ... One event causes another event

e.g. Earth quakes, stock trading, epidemiology, etc.

import numpy as np
from scipy.interpolate import BSpline
import scipy.interpolate as interpolate
from scipy import optimize
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
from numba import jit
import pickle as pkl
import pandas as pd
from scipy import interpolate
from scipy import stats

# change font settings
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 24

Hawkes Process $$ \Lambda(t)=\mu+\int_{-\infty}^{t} g(t-u) d N(u). $$ We consider a special case that $g(t)$ takes exponential function, $$ g(t)=\alpha e^{-\beta t} $$

Simulation¶

def simulate_hawkes(mu, alpha, beta, num_datapoints = 10000, neuton_iter_num = 800):
    # step1
    U = np.random.uniform(0, 1)
    # step2
    t1 = -np.log(U)/mu
    t = [t1]

    for k in range(num_datapoints):
        # step3
        U = np.random.uniform(0, 1)

        # step 4
        if k == 0:
            Sk = 1

        u = t[k] - np.log(U)/mu  # initial value of iteration
        for _ in range(neuton_iter_num):
            fu  = np.log(U) + mu*(u-t[k])  + alpha/beta*Sk*(1-np.exp(-beta*(u-t[k])))
            fu_dash = mu+alpha*Sk*np.exp(-beta*(u-t[k]))
            u -= fu/fu_dash

        # step 5
        t.append(u)
        Sk = np.exp(-beta*(t[k+1]-t[k]))*Sk + 1  # find S(k+1)

    t = np.array(t)


    # intensity
    time = np.linspace(0,1,10000)*max(t)
    increase = list(map(lambda x: alpha*np.exp(-beta * x), time))
    intensity = np.zeros(10000*2)
    for timepoints in t:
        yuko = np.append(np.append(np.zeros(len(time[time<timepoints])), increase), np.zeros(len(time[time>=timepoints])))    
        intensity += yuko
    intensity += mu
    
    return t, time, intensity

# hyperparameters
mu = 1
alpha = 1.5
beta = 3

# mu = 0.8
# alpha = 11
# beta = 30

t, time, intensity = simulate_hawkes(mu=mu, alpha=alpha, beta=beta, num_datapoints = 15, neuton_iter_num = 800)

fig, ax1 = plt.subplots(figsize=(20, 5))
ax2 = ax1.twinx()
ax2.scatter(t, np.ones(len(t)), facecolors='none', edgecolors='red', marker='o', s=150)
ax1.plot(time, intensity[:10000], c='b', lw=1)


# set graph labels
ax1.set_xlabel("t")
ax1.set_ylabel("intensity")

plt.tick_params(labelright=False)
plt.tick_params(right=False)

plt.savefig('./intensity.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

Here, red circles represents the occurance of events, and the blue line shows the underlying intensity.

The intensity rises when event occures, and diminishes over time

t_pv, time_pv, intensity_pv = simulate_hawkes(mu=mu, alpha=alpha, beta=beta, num_datapoints = 1000, neuton_iter_num = 800)

fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in t_pv:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.7)
    
plt.xlabel('$t$', fontsize=28)

plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./pvsim.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

↑Simulated Hawkes process. One red line = one event.

Reference: Simulation with independent point process where $\tau \sim U(0,1)$¶

unif = []
for _ in range(1000):
    unif.append(np.random.uniform(0, 1))
    
unif = np.cumsum(unif)

fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in unif:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.7)
    
plt.xlabel('$t$', fontsize=28)

plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./unifsim.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

↑Compared with simulated hawkes process, the lines are uniformly distributed (no clustering).

Maximum Likelihood Estimation¶

# param:[mu, alpha, beta]

@jit
def cost_MLE(param, t):
    likelihood = 0

    # first term
    likelihood -= param[0]*t[-1]
    for i in range(len(t)):
        # 2nd term
        likelihood += param[1]/param[2] * (np.exp(-param[2]*(t[-1]-t[i]))-1)

        # 3rd term
        ## Calculate A(i) first
        if i == 0:
            Ai = 0
        else:
            Ai = 0
            for j in range(i):
                Ai += np.exp(-param[2] * (t[i] -  t[j]))
        likelihood += np.log(param[0] + param[1] * Ai)

    return -likelihood

# initial guess and boundary
param0 = [0.01, 0.2, 5]
bound = [(0, None),(0.0001, None),(0,None)]

# maximum likelihood

params_MLE = optimize.minimize(cost_MLE,param0,args=(t),method='L-BFGS-B',\
                        jac=None, bounds=bound, tol=None, callback=None,\
                        options={'disp': None, 'maxls': 20, 'iprint': -1,\
                                 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 15000,\
                                 'ftol': 2.220446049250313e-09, 'maxcor': 10,\
                                 'maxfun': 15000})

Examine on simulated data¶

# hyperparameters
mu = 1
alpha = 1.5
beta = 3

t, time, intensity = simulate_hawkes(mu=mu, alpha=alpha, beta=beta, num_datapoints = 10000, neuton_iter_num = 800)

params_MLE = optimize.minimize(cost_MLE,param0,args=(t),method='L-BFGS-B',\
                        jac=None, bounds=bound, tol=None, callback=None,\
                        options={'disp': None, 'maxls': 20, 'iprint': -1,\
                                 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 15000,\
                                 'ftol': 2.220446049250313e-09, 'maxcor': 10,\
                                 'maxfun': 15000})

print('estimation finished successfully:', params_MLE['success'])
print('estimated params:', params_MLE['x'])
print('true params:', np.array([mu, alpha, beta]))

estimation finished successfully: True
estimated params: [1.03176744 1.49706542 3.13725551]
true params: [1.  1.5 3. ]

Time Varying Parameters¶

# base parameters
mu_base = 0.8
alpha_base = 11
beta_base = 30

A_mu = 0.5
A_alpha = 4
A_beta = 10

def time_varying_simulate_hawkes(mu_base, alpha_base, beta_base, A_mu, A_alpha, A_beta,
                                 T_max, neuton_iter_num = 800, find_intensity=True):
    '''
    The difference is that it can take over the previous intensity.
    '''    
    # step1
    U = np.random.uniform(0, 1)
    # step2
    mu = mu_base + A_mu*np.cos(0*2*np.pi/T_max)
    t1 = -np.log(U)/mu
    t = [t1]

    u = 0
    k = 0
    while u < T_max:
        # update parameters
        mu = mu_base + A_mu*np.cos(u*2*np.pi/T_max)
        alpha = alpha_base + A_alpha*np.cos(u*2*np.pi/T_max)
        beta = beta_base + A_beta*np.cos(u*2*np.pi/T_max)

        # step3
        U = np.random.uniform(0, 1)

        # step 4
        if k == 0:
            Sk = 1

        u = t[k] - np.log(U)/mu  # initial value of iteration
        for _ in range(neuton_iter_num):
            fu  = np.log(U) + mu*(u-t[k])  + alpha/beta*Sk*(1-np.exp(-beta*(u-t[k])))
            fu_dash = mu+alpha*Sk*np.exp(-beta*(u-t[k]))
            u -= fu/fu_dash

        # step 5
        t.append(u)
        Sk = np.exp(-beta*(t[k+1]-t[k]))*Sk + 1  # find S(k+1)
        k += 1
    
    t = np.array(t)

    

    # intensity
    time = np.linspace(0,1,100000)*max(t)
    
    if not find_intensity:
        return t, time
    increase = list(map(lambda x: alpha_base*np.exp(-beta_base * x), time))
    intensity = np.zeros(100000*2)
    for timepoints in t:
        yuko = np.append(np.append(np.zeros(len(time[time<timepoints])), increase), np.zeros(len(time[time>=timepoints])))    
        intensity += yuko

    
    return t, time, intensity

t_sim, time_sim, intensity_sim = time_varying_simulate_hawkes(mu_base=0.8, alpha_base=11, beta_base=30,
                             A_mu=0.5, A_alpha=4, A_beta=10,
                             T_max=1000)

fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in t_sim:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.7)

plt.xlabel('\n$t$')

plt.tick_params(labelleft=False, labelbottom=False)
plt.tick_params(left=False, bottom=False)

plt.savefig('./tvsim.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

↑It captures intraday seasonality: there are less trades in the middle.

fig, ax1 = plt.subplots(figsize=(20, 5))
ax2 = ax1.twinx()
ax1.scatter(t_sim[:20], np.ones(len(t_sim[:20])), facecolors='none', edgecolors='red', marker='*', s=150)
ax2.plot(time_sim[time_sim<t_sim[20]], intensity_sim[:len(time_sim[time_sim<t_sim[20]])], c='b', lw=1)
plt.show()

Time-Varying Maximum Likelihood Estimation with Chopping¶

# create larger data
t_sim, time_sim, intensity_sim = time_varying_simulate_hawkes(mu_base=0.8, alpha_base=11, beta_base=30,
                             A_mu=0.5, A_alpha=4, A_beta=10,
                             T_max=20000)

# param:[mu, alpha, beta]

@jit
def cost_MLE(param, t):
    likelihood = 0

    # first term
    likelihood -= param[0]*t[-1]
    for i in range(len(t)):
        # 2nd term
        likelihood += param[1]/param[2] * (np.exp(-param[2]* (t[-1]-t[i]) )-1)

        # 3rd term
        ## Calculate A(i) first
        if i == 0:
            Ai = 0
        else:
            Ai = 0
            for j in range(i):
                Ai += np.exp(-param[2] * (t[i] -  t[j]))
        likelihood += np.log(param[0] + param[1] * Ai)

    return -likelihood

# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]

# maximum likelihood
num_split = 16


def chopped_mle(param0, bound, num_split, t):
    chopped_t_stard_end = []
    chopped_mus = []
    chopped_alphas = []
    chopped_betas = []
    for i in range(num_split):
        chopped_t = t[np.where((t > i*t[-1]/num_split) & (t <= (i+1)*t[-1]/num_split+0.001), True, False)]
        chopped_t = chopped_t-chopped_t[0]
        chopped_t = chopped_t[1:]
        chopped_t_stard_end.append([t[-1]/num_split*i, t[-1]/num_split*(i+1)])

        params_MLE = optimize.minimize(cost_MLE,param0,args=(chopped_t),method='Nelder-Mead',\
                                jac=None, tol=None, callback=None,\
                                options={'disp': None, 'ftol': 2.220446049250313e-09})
#         print(chopped_t, params_MLE['x'][0])
        chopped_mus.append(params_MLE['x'][0])
        chopped_alphas.append(params_MLE['x'][1])
        chopped_betas.append(params_MLE['x'][2])
    return chopped_t_stard_end, chopped_mus, chopped_alphas, chopped_betas

# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
num_split = 16

chopped_t_stard_end_sim, chopped_mus_sim, chopped_alphas_sim, chopped_betas_sim = chopped_mle(param0, bound, num_split, t_sim)

_t = np.linspace(0,chopped_t_stard_end_sim[-1][1],100)
_T = np.max(_t)
cos = np.cos(t_sim*2*np.pi/_T)
mu_star = mu_base + A_mu * cos
alpha_star  = alpha_base + A_alpha * cos
beta_star = beta_base + A_beta * cos

# spline function
def spline3(x,y,point,deg):
    tck,u = interpolate.splprep([x,y],k=deg,s=0) 
    u = np.linspace(0,1,num=point,endpoint=True) 
    spline = interpolate.splev(u,tck)
    return spline[0],spline[1]

x, y = [], []
for start_end, mu in zip(chopped_t_stard_end_sim, chopped_mus_sim):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(mu)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(t_sim, mu_star, 'green', linestyle = ":", lw=4, label='true')
plt.plot(a4,b4,label="spline", linewidth=3)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='lower right')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$\mu$      ', fontsize=28, rotation=0)
plt.tick_params(labelbottom=False)
plt.tick_params(bottom=False)
plt.savefig('./muest.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

x, y = [], []
for start_end, alpha in zip(chopped_t_stard_end_sim, chopped_alphas_sim):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(alpha)
    
fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(t_sim, alpha_star, 'green', linestyle = ":", lw=4, label='true')
plt.plot(a4,b4,label="spline", linewidth=3)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='lower right')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('α      ', fontsize=28, rotation=0)
plt.tick_params(labelbottom=False)
plt.tick_params(bottom=False)
plt.savefig('./alphaest.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

x, y = [], []
for start_end, beta in zip(chopped_t_stard_end_sim, chopped_betas_sim):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(beta)
    
fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(t_sim, beta_star, 'green', linestyle = ":", lw=4, label='true')
plt.plot(a4,b4,label="spline", linewidth=3)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='lower right')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('β      ', fontsize=28, rotation=0)
plt.tick_params(labelbottom=False)
plt.tick_params(bottom=False)
plt.savefig('./betaest.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

Application on Real Data¶

Toyota's stock data¶

df = pd.read_csv("../shared_acd/data/toyota20170605to0609_Z.csv")
df.head()

t_toyota = np.array(np.cumsum(df[df['date']==20170606]['duration']))
pd.DataFrame(pd.Series(t_toyota.ravel()).describe()).transpose()

fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in t_toyota:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.18)

plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./toyota.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
t_toyota_morning = t_toyota[:5447]
for tp in t_toyota_morning:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.18)

plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./toyota_morning.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
t_toyota_morning = t_toyota[5447:]
for tp in t_toyota_morning:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.18)

plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./toyota_afternoon.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
num_split = 16

chopped_t_stard_end_toyota, chopped_mus_toyota, chopped_alphas_toyota, chopped_betas_toyota = chopped_mle(param0, bound, num_split, t_toyota)

x, y = [], []
for start_end, mu in zip(chopped_t_stard_end_toyota, chopped_mus_toyota):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(mu)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$\mu$      ', fontsize=28, rotation=0)
plt.savefig('./toyotamu.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

x, y = [], []
for start_end, alpha in zip(chopped_t_stard_end_toyota, chopped_alphas_toyota):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(alpha)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$α$      ', fontsize=28, rotation=0)
plt.savefig('./toyotaalpha.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

x, y = [], []
for start_end, beta in zip(chopped_t_stard_end_toyota, chopped_betas_toyota):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(beta)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$β$      ', fontsize=28, rotation=0)
plt.savefig('./toyotabeta.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

Nagatanien's stock data¶

nagatani = pd.read_csv("../shared_acd/data/nagatanien20170605to0609_Z.csv")
nagatani.head()

t_nagatani = np.array(np.cumsum(nagatani[nagatani['date']==20170606]['duration']))
pd.DataFrame(pd.Series(t_nagatani.ravel()).describe()).transpose()

t_nagatani = np.array(np.cumsum(nagatani['duration']))
pd.DataFrame(pd.Series(t_nagatani.ravel()).describe()).transpose()

fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in t_nagatani:
    ax.plot([tp,tp], [0,1], 'r-', lw=3)

plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./nagatani.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
num_split = 15

chopped_t_stard_end_nagatani, chopped_mus_nagatani, chopped_alphas_nagatani, chopped_betas_nagatani = chopped_mle(param0, bound, num_split, t_nagatani)

nxmu, nymu = [], []
for start_end, mu in zip(chopped_t_stard_end_nagatani, chopped_mus_nagatani):
    nxmu.append(np.mean([start_end[0],start_end[1]]))
    nymu.append(mu)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(nxmu,nymu,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(nxmu, nymu, 'ro',label="controlpoint", markersize=10)
xaxis_ = ax.xaxis
xaxis_.set_major_locator(ticker.FixedLocator(np.linspace(0, 90000, 6)[:-1]))
xaxis_.set_ticklabels(["June-5", "June-6", "June-7", "June-8", "June-9"])
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$\mu$      ', fontsize=28, rotation=0)
plt.savefig('./nagatanimu.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

	Unnamed: 0	date	Unnamed: 2	zenba	duration	spread	bid_quot_volume1	ask_quot_volume1	bid_over_ask
0	1	20170605	1	1	0.1447	2	100	100	0.000000
1	2	20170605	1	1	0.1595	3	200	300	0.500000
2	3	20170605	1	1	0.4670	3	300	200	-0.333333
3	4	20170605	1	1	0.3831	3	500	200	-0.600000
4	5	20170605	1	1	0.0330	1	200	100	-0.500000

K.A

Hawkes Process Simulation and Application