Finance, Data Science

Hawkes Process Simulation and Application

hawkes

Simulation of Hawkes Process

Hawkes process is useful when we model a self-exciting point process

Self-exciting ... One event causes another event

e.g. Earth quakes, stock trading, epidemiology, etc.

In [1]:
import numpy as np
from scipy.interpolate import BSpline
import scipy.interpolate as interpolate
from scipy import optimize
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
from numba import jit
import pickle as pkl
import pandas as pd
from scipy import interpolate
from scipy import stats

# change font settings
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 24

Hawkes Process $$ \Lambda(t)=\mu+\int_{-\infty}^{t} g(t-u) d N(u). $$ We consider a special case that $g(t)$ takes exponential function, $$ g(t)=\alpha e^{-\beta t} $$

Simulation

In [2]:
def simulate_hawkes(mu, alpha, beta, num_datapoints = 10000, neuton_iter_num = 800):
    # step1
    U = np.random.uniform(0, 1)
    # step2
    t1 = -np.log(U)/mu
    t = [t1]

    for k in range(num_datapoints):
        # step3
        U = np.random.uniform(0, 1)

        # step 4
        if k == 0:
            Sk = 1

        u = t[k] - np.log(U)/mu  # initial value of iteration
        for _ in range(neuton_iter_num):
            fu  = np.log(U) + mu*(u-t[k])  + alpha/beta*Sk*(1-np.exp(-beta*(u-t[k])))
            fu_dash = mu+alpha*Sk*np.exp(-beta*(u-t[k]))
            u -= fu/fu_dash

        # step 5
        t.append(u)
        Sk = np.exp(-beta*(t[k+1]-t[k]))*Sk + 1  # find S(k+1)

    t = np.array(t)


    # intensity
    time = np.linspace(0,1,10000)*max(t)
    increase = list(map(lambda x: alpha*np.exp(-beta * x), time))
    intensity = np.zeros(10000*2)
    for timepoints in t:
        yuko = np.append(np.append(np.zeros(len(time[time<timepoints])), increase), np.zeros(len(time[time>=timepoints])))    
        intensity += yuko
    intensity += mu
    
    return t, time, intensity
In [3]:
# hyperparameters
mu = 1
alpha = 1.5
beta = 3

# mu = 0.8
# alpha = 11
# beta = 30

t, time, intensity = simulate_hawkes(mu=mu, alpha=alpha, beta=beta, num_datapoints = 15, neuton_iter_num = 800)

fig, ax1 = plt.subplots(figsize=(20, 5))
ax2 = ax1.twinx()
ax2.scatter(t, np.ones(len(t)), facecolors='none', edgecolors='red', marker='o', s=150)
ax1.plot(time, intensity[:10000], c='b', lw=1)


# set graph labels
ax1.set_xlabel("t")
ax1.set_ylabel("intensity")

plt.tick_params(labelright=False)
plt.tick_params(right=False)

plt.savefig('./intensity.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

Here, red circles represents the occurance of events, and the blue line shows the underlying intensity.

The intensity rises when event occures, and diminishes over time

In [4]:
t_pv, time_pv, intensity_pv = simulate_hawkes(mu=mu, alpha=alpha, beta=beta, num_datapoints = 1000, neuton_iter_num = 800)

fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in t_pv:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.7)
    
plt.xlabel('$t$', fontsize=28)

plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./pvsim.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

↑Simulated Hawkes process. One red line = one event.

Reference: Simulation with independent point process where $\tau \sim U(0,1)$

In [5]:
unif = []
for _ in range(1000):
    unif.append(np.random.uniform(0, 1))
    
unif = np.cumsum(unif)

fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in unif:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.7)
    
plt.xlabel('$t$', fontsize=28)

plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./unifsim.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

↑Compared with simulated hawkes process, the lines are uniformly distributed (no clustering).

Maximum Likelihood Estimation

In [6]:
# param:[mu, alpha, beta]

@jit
def cost_MLE(param, t):
    likelihood = 0

    # first term
    likelihood -= param[0]*t[-1]
    for i in range(len(t)):
        # 2nd term
        likelihood += param[1]/param[2] * (np.exp(-param[2]*(t[-1]-t[i]))-1)

        # 3rd term
        ## Calculate A(i) first
        if i == 0:
            Ai = 0
        else:
            Ai = 0
            for j in range(i):
                Ai += np.exp(-param[2] * (t[i] -  t[j]))
        likelihood += np.log(param[0] + param[1] * Ai)

    return -likelihood

# initial guess and boundary
param0 = [0.01, 0.2, 5]
bound = [(0, None),(0.0001, None),(0,None)]

# maximum likelihood

params_MLE = optimize.minimize(cost_MLE,param0,args=(t),method='L-BFGS-B',\
                        jac=None, bounds=bound, tol=None, callback=None,\
                        options={'disp': None, 'maxls': 20, 'iprint': -1,\
                                 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 15000,\
                                 'ftol': 2.220446049250313e-09, 'maxcor': 10,\
                                 'maxfun': 15000})

Examine on simulated data

In [7]:
# hyperparameters
mu = 1
alpha = 1.5
beta = 3

t, time, intensity = simulate_hawkes(mu=mu, alpha=alpha, beta=beta, num_datapoints = 10000, neuton_iter_num = 800)
In [8]:
params_MLE = optimize.minimize(cost_MLE,param0,args=(t),method='L-BFGS-B',\
                        jac=None, bounds=bound, tol=None, callback=None,\
                        options={'disp': None, 'maxls': 20, 'iprint': -1,\
                                 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': 15000,\
                                 'ftol': 2.220446049250313e-09, 'maxcor': 10,\
                                 'maxfun': 15000})
In [9]:
print('estimation finished successfully:', params_MLE['success'])
print('estimated params:', params_MLE['x'])
print('true params:', np.array([mu, alpha, beta]))
estimation finished successfully: True
estimated params: [1.03176744 1.49706542 3.13725551]
true params: [1.  1.5 3. ]

Time Varying Parameters

In [10]:
# base parameters
mu_base = 0.8
alpha_base = 11
beta_base = 30

A_mu = 0.5
A_alpha = 4
A_beta = 10
In [11]:
def time_varying_simulate_hawkes(mu_base, alpha_base, beta_base, A_mu, A_alpha, A_beta,
                                 T_max, neuton_iter_num = 800, find_intensity=True):
    '''
    The difference is that it can take over the previous intensity.
    '''    
    # step1
    U = np.random.uniform(0, 1)
    # step2
    mu = mu_base + A_mu*np.cos(0*2*np.pi/T_max)
    t1 = -np.log(U)/mu
    t = [t1]

    u = 0
    k = 0
    while u < T_max:
        # update parameters
        mu = mu_base + A_mu*np.cos(u*2*np.pi/T_max)
        alpha = alpha_base + A_alpha*np.cos(u*2*np.pi/T_max)
        beta = beta_base + A_beta*np.cos(u*2*np.pi/T_max)

        # step3
        U = np.random.uniform(0, 1)

        # step 4
        if k == 0:
            Sk = 1

        u = t[k] - np.log(U)/mu  # initial value of iteration
        for _ in range(neuton_iter_num):
            fu  = np.log(U) + mu*(u-t[k])  + alpha/beta*Sk*(1-np.exp(-beta*(u-t[k])))
            fu_dash = mu+alpha*Sk*np.exp(-beta*(u-t[k]))
            u -= fu/fu_dash

        # step 5
        t.append(u)
        Sk = np.exp(-beta*(t[k+1]-t[k]))*Sk + 1  # find S(k+1)
        k += 1
    
    t = np.array(t)

    

    # intensity
    time = np.linspace(0,1,100000)*max(t)
    
    if not find_intensity:
        return t, time
    increase = list(map(lambda x: alpha_base*np.exp(-beta_base * x), time))
    intensity = np.zeros(100000*2)
    for timepoints in t:
        yuko = np.append(np.append(np.zeros(len(time[time<timepoints])), increase), np.zeros(len(time[time>=timepoints])))    
        intensity += yuko

    
    return t, time, intensity
In [12]:
t_sim, time_sim, intensity_sim = time_varying_simulate_hawkes(mu_base=0.8, alpha_base=11, beta_base=30,
                             A_mu=0.5, A_alpha=4, A_beta=10,
                             T_max=1000)
In [13]:
fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in t_sim:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.7)

plt.xlabel('\n$t$')

plt.tick_params(labelleft=False, labelbottom=False)
plt.tick_params(left=False, bottom=False)

plt.savefig('./tvsim.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

↑It captures intraday seasonality: there are less trades in the middle.

In [14]:
fig, ax1 = plt.subplots(figsize=(20, 5))
ax2 = ax1.twinx()
ax1.scatter(t_sim[:20], np.ones(len(t_sim[:20])), facecolors='none', edgecolors='red', marker='*', s=150)
ax2.plot(time_sim[time_sim<t_sim[20]], intensity_sim[:len(time_sim[time_sim<t_sim[20]])], c='b', lw=1)
plt.show()

Time-Varying Maximum Likelihood Estimation with Chopping

In [15]:
# create larger data
t_sim, time_sim, intensity_sim = time_varying_simulate_hawkes(mu_base=0.8, alpha_base=11, beta_base=30,
                             A_mu=0.5, A_alpha=4, A_beta=10,
                             T_max=20000)
In [16]:
# param:[mu, alpha, beta]

@jit
def cost_MLE(param, t):
    likelihood = 0

    # first term
    likelihood -= param[0]*t[-1]
    for i in range(len(t)):
        # 2nd term
        likelihood += param[1]/param[2] * (np.exp(-param[2]* (t[-1]-t[i]) )-1)

        # 3rd term
        ## Calculate A(i) first
        if i == 0:
            Ai = 0
        else:
            Ai = 0
            for j in range(i):
                Ai += np.exp(-param[2] * (t[i] -  t[j]))
        likelihood += np.log(param[0] + param[1] * Ai)

    return -likelihood

# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]

# maximum likelihood
num_split = 16


def chopped_mle(param0, bound, num_split, t):
    chopped_t_stard_end = []
    chopped_mus = []
    chopped_alphas = []
    chopped_betas = []
    for i in range(num_split):
        chopped_t = t[np.where((t > i*t[-1]/num_split) & (t <= (i+1)*t[-1]/num_split+0.001), True, False)]
        chopped_t = chopped_t-chopped_t[0]
        chopped_t = chopped_t[1:]
        chopped_t_stard_end.append([t[-1]/num_split*i, t[-1]/num_split*(i+1)])

        params_MLE = optimize.minimize(cost_MLE,param0,args=(chopped_t),method='Nelder-Mead',\
                                jac=None, tol=None, callback=None,\
                                options={'disp': None, 'ftol': 2.220446049250313e-09})
#         print(chopped_t, params_MLE['x'][0])
        chopped_mus.append(params_MLE['x'][0])
        chopped_alphas.append(params_MLE['x'][1])
        chopped_betas.append(params_MLE['x'][2])
    return chopped_t_stard_end, chopped_mus, chopped_alphas, chopped_betas
In [17]:
# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
num_split = 16

chopped_t_stard_end_sim, chopped_mus_sim, chopped_alphas_sim, chopped_betas_sim = chopped_mle(param0, bound, num_split, t_sim)
In [18]:
_t = np.linspace(0,chopped_t_stard_end_sim[-1][1],100)
_T = np.max(_t)
cos = np.cos(t_sim*2*np.pi/_T)
mu_star = mu_base + A_mu * cos
alpha_star  = alpha_base + A_alpha * cos
beta_star = beta_base + A_beta * cos
In [19]:
# spline function
def spline3(x,y,point,deg):
    tck,u = interpolate.splprep([x,y],k=deg,s=0) 
    u = np.linspace(0,1,num=point,endpoint=True) 
    spline = interpolate.splev(u,tck)
    return spline[0],spline[1]
In [20]:
x, y = [], []
for start_end, mu in zip(chopped_t_stard_end_sim, chopped_mus_sim):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(mu)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(t_sim, mu_star, 'green', linestyle = ":", lw=4, label='true')
plt.plot(a4,b4,label="spline", linewidth=3)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='lower right')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$\mu$      ', fontsize=28, rotation=0)
plt.tick_params(labelbottom=False)
plt.tick_params(bottom=False)
plt.savefig('./muest.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [21]:
x, y = [], []
for start_end, alpha in zip(chopped_t_stard_end_sim, chopped_alphas_sim):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(alpha)
    
fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(t_sim, alpha_star, 'green', linestyle = ":", lw=4, label='true')
plt.plot(a4,b4,label="spline", linewidth=3)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='lower right')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('α      ', fontsize=28, rotation=0)
plt.tick_params(labelbottom=False)
plt.tick_params(bottom=False)
plt.savefig('./alphaest.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [22]:
x, y = [], []
for start_end, beta in zip(chopped_t_stard_end_sim, chopped_betas_sim):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(beta)
    
fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(t_sim, beta_star, 'green', linestyle = ":", lw=4, label='true')
plt.plot(a4,b4,label="spline", linewidth=3)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='lower right')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('β      ', fontsize=28, rotation=0)
plt.tick_params(labelbottom=False)
plt.tick_params(bottom=False)
plt.savefig('./betaest.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

Application on Real Data

Toyota's stock data

In [23]:
df = pd.read_csv("../shared_acd/data/toyota20170605to0609_Z.csv")
df.head()
Out[23]:
Unnamed: 0 date Unnamed: 2 Unnamed: 3 Unnamed: 4 Unnamed: 5 Unnamed: 6 zenba duration spread bid_quot_volume1 ask_quot_volume1 bid_over_ask
0 1 20170605 1 0 0 0 0 1 0.1447 2 100 100 0.000000
1 2 20170605 1 0 0 0 0 1 0.1595 3 200 300 0.500000
2 3 20170605 1 0 0 0 0 1 0.4670 3 300 200 -0.333333
3 4 20170605 1 0 0 0 0 1 0.3831 3 500 200 -0.600000
4 5 20170605 1 0 0 0 0 1 0.0330 1 200 100 -0.500000
In [24]:
t_toyota = np.array(np.cumsum(df[df['date']==20170606]['duration']))
pd.DataFrame(pd.Series(t_toyota.ravel()).describe()).transpose()
Out[24]:
count mean std min 25% 50% 75% max
0 9568.0 8586.535043 5977.556532 0.1207 3138.17875 7706.32505 14725.79155 17999.694
In [25]:
fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in t_toyota:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.18)

plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./toyota.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [26]:
fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
t_toyota_morning = t_toyota[:5447]
for tp in t_toyota_morning:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.18)

plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./toyota_morning.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [27]:
fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)
t_toyota_morning = t_toyota[5447:]
for tp in t_toyota_morning:
    ax.plot([tp,tp], [0,1], 'r-', lw=0.18)

plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./toyota_afternoon.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [28]:
# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
num_split = 16

chopped_t_stard_end_toyota, chopped_mus_toyota, chopped_alphas_toyota, chopped_betas_toyota = chopped_mle(param0, bound, num_split, t_toyota)
In [29]:
x, y = [], []
for start_end, mu in zip(chopped_t_stard_end_toyota, chopped_mus_toyota):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(mu)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$\mu$      ', fontsize=28, rotation=0)
plt.savefig('./toyotamu.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [30]:
x, y = [], []
for start_end, alpha in zip(chopped_t_stard_end_toyota, chopped_alphas_toyota):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(alpha)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$α$      ', fontsize=28, rotation=0)
plt.savefig('./toyotaalpha.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [31]:
x, y = [], []
for start_end, beta in zip(chopped_t_stard_end_toyota, chopped_betas_toyota):
    x.append(np.mean([start_end[0],start_end[1]]))
    y.append(beta)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(x,y,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(x, y, 'ro',label="controlpoint", markersize=10)
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$β$      ', fontsize=28, rotation=0)
plt.savefig('./toyotabeta.png',bbox_inches='tight', pad_inches=0.2)
plt.show()

Nagatanien's stock data

In [32]:
nagatani = pd.read_csv("../shared_acd/data/nagatanien20170605to0609_Z.csv")
nagatani.head()
Out[32]:
Unnamed: 0 date 20170605 20170606 20170607 20170608 20170609 zenba duration spread bid_quot_volume1 ask_quot_volume1 bid_over_ask
0 1 20170605 1 0 0 0 0 1 3.6700 10 1000 1000 0.0
1 2 20170605 1 0 0 0 0 1 379.5934 9 1000 1000 0.0
2 3 20170605 1 0 0 0 0 1 0.0000 10 1000 1000 0.0
3 4 20170605 1 0 0 0 0 1 893.2745 8 1000 1000 0.0
4 5 20170605 1 0 0 0 0 1 24.2856 21 1000 1000 0.0
In [33]:
t_nagatani = np.array(np.cumsum(nagatani[nagatani['date']==20170606]['duration']))
pd.DataFrame(pd.Series(t_nagatani.ravel()).describe()).transpose()
Out[33]:
count mean std min 25% 50% 75% max
0 50.0 6416.877232 6606.813688 0.0631 977.2714 2562.35295 10590.805175 17999.6016
In [34]:
t_nagatani = np.array(np.cumsum(nagatani['duration']))
pd.DataFrame(pd.Series(t_nagatani.ravel()).describe()).transpose()
Out[34]:
count mean std min 25% 50% 75% max
0 174.0 42229.013847 25189.418344 3.67 18912.365375 40206.24645 65871.468125 87577.2701
In [35]:
fig  = plt.figure(figsize=(25,4))
ax=fig.add_subplot(1,1,1)

for tp in t_nagatani:
    ax.plot([tp,tp], [0,1], 'r-', lw=3)

plt.xlabel('$t$', fontsize=28)
plt.tick_params(labelleft=False)
plt.tick_params(left=False)

plt.savefig('./nagatani.png',bbox_inches='tight', pad_inches=0.2)
plt.show()
In [36]:
# initial guess and boundary
param0 = [0.8, 11, 30]
bound = [(0, None),(0.0001, None),(0,None)]
num_split = 15

chopped_t_stard_end_nagatani, chopped_mus_nagatani, chopped_alphas_nagatani, chopped_betas_nagatani = chopped_mle(param0, bound, num_split, t_nagatani)
In [37]:
nxmu, nymu = [], []
for start_end, mu in zip(chopped_t_stard_end_nagatani, chopped_mus_nagatani):
    nxmu.append(np.mean([start_end[0],start_end[1]]))
    nymu.append(mu)

fig  = plt.figure(figsize=(25,6))
ax=fig.add_subplot(1,1,1)
a4, b4 = spline3(nxmu,nymu,100,3)
plt.plot(a4,b4,label="spline", linewidth=2)
plt.plot(nxmu, nymu, 'ro',label="controlpoint", markersize=10)
xaxis_ = ax.xaxis
xaxis_.set_major_locator(ticker.FixedLocator(np.linspace(0, 90000, 6)[:-1]))
xaxis_.set_ticklabels(["June-5", "June-6", "June-7", "June-8", "June-9"])
plt.legend(loc='upper center')
plt.xlabel('$t$', fontsize=28)
plt.ylabel('$\mu$      ', fontsize=28, rotation=0)
plt.savefig('./nagatanimu.png',bbox_inches='tight', pad_inches=0.2)
plt.show()