In this section, we will model predator-prey relationship. The system is described by the classic Lotka-Volterra model.
The system is described by:
\[\begin{equation} \begin{cases} \frac{dN}{dt} = f(N,C) = N (a - bP) \\ \frac{dP}{dt} = g(N,C) = P (cN -d) \tag{1} \end{cases} \end{equation}\]
We first define the derivative function. Here, we allow \(a\), \(b\), \(c\), and \(d\) as four explicit parameters.
derivs_LV <- function(t, Xs, a, b, c, d) { # derivatives of the Lotka-Volterra model
N = Xs[1]
P = Xs[2]
dNdt = N * (a- b*P)
dPdt = P * (c*N - d)
return(c(dNdt, dPdt))
}
We will use the RK4 integrator to simulate the ODEs defined by Equation (1).
# 4th order Runge-Kutta (RK4) for a generic multi-variable system, see Part 3A
RK4_generic <- function(derivs, X0, t.total, dt, ...){
# derivs: the function of the derivatives
# X0: initial condition, a vector of multiple variables
# t.total: total simulation time, assuming t starts from 0 at the beginning
# dt: time step size
t_all = seq(0, t.total, by=dt)
n_all = length(t_all)
nx = length(X0)
X_all = matrix(0, nrow = n_all, ncol = nx)
X_all[1,] = X0
for (i in 1:(n_all-1)) {
t_0= t_all[i]
t_0.5 = t_0 + 0.5*dt
t_1 = t_0 + dt
k1 = dt * derivs(t_0,X_all[i,],...)
k2 = dt * derivs(t_0.5,X_all[i,] + k1/2,...)
k3 = dt * derivs(t_0.5,X_all[i,] + k2/2,...)
k4 = dt * derivs(t_1,X_all[i,] + k3,...)
X_all[i+1,] = X_all[i,] + (k1+2*k2+2*k3+k4)/6
}
return(cbind(t_all, X_all)) # the output is a matrix of t & X(t) for all time steps
}
We set \(a = d = 1\), \(b = 0.03\), and \(c = 0.02\)
If we start the simulation from the initial condition \({X(t = 0)} = (2, 0)\),
a = 1; b = 0.03; c = 0.02; d = 1
X0_1 = c(2, 0) # start from N = 2, P = 0,
results_LV_1 = RK4_generic(derivs = derivs_LV, X0 = X0_1, t.total = 10, dt = 0.01, a, b, c, d)
plot(results_LV_1[,1], results_LV_1[,2], type = "l", col=2,
xlab="t", ylab="Levels", xlim=c(0,10), ylim=c(0,100))
lines(results_LV_1[,1], results_LV_1[,3], type = "l", col=3)
legend("topright", inset=0.02,
legend = c("N", "P"),
col=2:3, lty=1, cex=0.8)
With no predator, prey follows exponential growth.
If we start the simulation from the initial condition \({X(t = 0)} = (0, 100)\),
X0_2 = c(0, 100) # start from N = 0, P = 100,
results_LV_2 = RK4_generic(derivs = derivs_LV, X0 = X0_2, t.total = 10, dt = 0.01, a, b, c, d)
plot(results_LV_2[,1], results_LV_2[,2], type = "l", col=2,
xlab="t", ylab="Levels", xlim=c(0,10), ylim=c(0,100))
lines(results_LV_2[,1], results_LV_2[,3], type = "l", col=3)
legend("topright", inset=0.02,
legend = c("N", "P"),
col=2:3, lty=1, cex=0.8)
With no prey, predator follows exponential decay.
If we start the simulation from the initial condition \({X(t = 0)} = (30, 10)\),
X0_3 = c(30, 10) # start from N = 30, P = 10,
results_LV_3 = RK4_generic(derivs = derivs_LV, X0 = X0_3, t.total = 50, dt = 0.01, a, b, c, d)
plot(results_LV_3[,1], results_LV_3[,2], type = "l", col=2,
xlab="t", ylab="Levels", xlim=c(0,50), ylim=c(0,150))
lines(results_LV_3[,1], results_LV_3[,3], type = "l", col=3)
legend("topright", inset=0.02,
legend = c("N", "P"),
col=2:3, lty=1, cex=0.8)
When both predator and prey exist, the system generates oscillations.
Here we explore a few different initial conditions and plot the time trajectories in a plane plane of \(N\) and \(P\).
# Another three initial conditions
X0_4 = c(40, 20)
X0_5 = c(30, 25)
X0_6 = c(20, 40)
# A total of four initial conditions to consider
IC_all = rbind(X0_3, X0_4, X0_5, X0_6)
legend_IC = apply(IC_all, 1, paste, collapse = ",")
results_LV_4 = RK4_generic(derivs = derivs_LV, X0 = X0_4, t.total = 50, dt = 0.01, a, b, c, d)
results_LV_5 = RK4_generic(derivs = derivs_LV, X0 = X0_5, t.total = 50, dt = 0.01, a, b, c, d)
results_LV_6 = RK4_generic(derivs = derivs_LV, X0 = X0_6, t.total = 50, dt = 0.01, a, b, c, d)
colnames(results_LV_4) = c("t", "N", "P")
colnames(results_LV_5) = c("t", "N", "P")
colnames(results_LV_6) = c("t", "N", "P")
plot(IC_all[,1], IC_all[,2], type = "p", pch = 16, col = 1,
xlab="N", ylab="P", xlim=c(0,150), ylim=c(0,100))
lines(results_LV_3[,2], results_LV_3[,3], type = "l", col=2)
lines(results_LV_4[,2], results_LV_4[,3], type = "l", col=3)
lines(results_LV_5[,2], results_LV_5[,3], type = "l", col=4)
lines(results_LV_6[,2], results_LV_6[,3], type = "l", col=5)
legend("topright", title = "Initial conditions", inset=0.02,
legend = legend_IC,
col=2:5, lty=1, cex=0.8)
The directions of changes of an ODE system can be represented by vectors in the phase plane.
\[\begin{equation} \begin{cases} V_X = \frac{dX}{dt} \\ V_Y = \frac{dY}{dt} \end{cases} \end{equation}\]
library(ggplot2)
N_all = seq(0, 150, by=10) # all N grids
P_all = seq(0, 100, by=10) # all P grids
NP_all = expand.grid(N_all, P_all) # all combinations of N and P
results = t(apply(NP_all, MARGIN=1, function(Xs) {return(c(Xs, derivs_LV(0, Xs, a, b, c, d)))})) # generate all vector field data
colnames(results) = c("N", "P", "dN", "dP")
scale = 0.1
ggplot(data=as.data.frame(results), aes(x=N, y=P)) +
geom_segment(aes(xend=N+scale*dN, yend=P+scale*dP), arrow = arrow(length = unit(0.05,"in")))
We can choose to plot the unit vectors of the vector field to highlight the directions.
results_unit = t(apply(NP_all, MARGIN=1, function(Xs) {
v = derivs_LV(0, Xs, a, b, c, d)
v_norm = v / sqrt(v[1]**2 + v[2]**2)
return(c(Xs, v_norm))})) # generate unit vector
colnames(results_unit) = c("N", "P", "dN", "dP")
scale = 3
p1 = ggplot(data=as.data.frame(results_unit), aes(x=N, y=P)) +
geom_segment(aes(xend=N+scale*dN, yend=P+scale*dP), arrow = arrow(length = unit(0.05,"in")))
colnames(results_LV_3) = c("t", "N", "P")
p1 + guides(color="none") +
geom_path(data = as.data.frame(results_LV_3), aes(x=N, y=P, colour = 1), size=1)
Below we choose different initial conditions and, for each, simulate the time trajectory and plot it in the same phase plane.
p2 = p1 + guides(color=guide_legend("ICs")) +
geom_path(data = as.data.frame(results_LV_3), aes(x=N, y=P, colour = legend_IC[1]), size=1) +
geom_path(data = as.data.frame(results_LV_4), aes(x=N, y=P, colour = legend_IC[2]), size=1) +
geom_path(data = as.data.frame(results_LV_5), aes(x=N, y=P, colour = legend_IC[3]), size=1) +
geom_path(data = as.data.frame(results_LV_6), aes(x=N, y=P, colour = legend_IC[4]), size=1)
p2
# Nullclines
Another powerful method to evaluate the behavior of a nonlinear dynamical system is the techniques of nullcline analysis. A nullcline corresponds to the steady state condition of one of the two ODEs: (1) \(\frac{dX}{dt} = 0\) and (2) \(\frac{dY}{dt} = 0\). In the Lotka-Volterra model, the N-nullcline satisfies
\(N = 0\) or \(P = \frac{a}{b}\);
The P-nullcline satisfies
\(P = 0\) or \(N = \frac{d}{c}\).
We add the two nullclines to the phase plane. The intersections of the N-nullcline and P-nullcline are the steady states of the system.
\[\begin{equation} \begin{cases} \frac{dN}{dt} = 0 \\ \frac{dP}{dt} = 0 \end{cases} \end{equation}\]
In this case, since each nullcline has two line segments, pay attention that only the intersections of lines in different colors can be steady states. Here, the first steady state \(X_1 = (0, 0)\) and the second steady state \(X_2 = (\frac{d}{c}, \frac{a}{b})\) (black dots in the following plot).
# Vector field + Nullclines + Steady states
p3 = p1 +
geom_hline(aes(yintercept = a/b, color = "N-nullcline")) +
geom_vline(aes(xintercept = 0, color = "N-nullcline")) +
geom_vline(aes(xintercept = d/c, color = "P-nullcline")) +
geom_hline(aes(yintercept = 0, color = "P-nullcline")) +
geom_point(aes(x=0, y=0, colour="Steady states"), size = 3) +
geom_point(aes(x=d/c, y=a/b, colour="Steady states"), size = 3)
# Plus a time trajectroy
p3 + geom_path(data = as.data.frame(results_LV_3), aes(x=N, y=P, color = "Trajectory"), size=1) +
scale_colour_manual(values = c("red", "blue", "black", "orange"))
For each steady state, we perturb the state of the system slightly away from the steady state. Pay attention that the state variables \(N\) and \(P\) need to be non-negative.
results_LV_ss1 = RK4_generic(derivs = derivs_LV, X0 = c(10, 12), t.total = 10, dt = 0.01, a, b, c, d)
colnames(results_LV_ss1) = c("t", "N", "P")
results_LV_ss2 = RK4_generic(derivs = derivs_LV, X0 = c(d/c + 3, a/b - 4), t.total = 10, dt = 0.01, a, b, c, d)
colnames(results_LV_ss2) = c("t", "N", "P")
# Plus a time trajectory
p3 + geom_path(data = as.data.frame(results_LV_ss1), aes(x=N, y=P, color = "Trajectory"), size=1) +
geom_path(data = as.data.frame(results_LV_ss2), aes(x=N, y=P, color = "Trajectory"), size=1) +
scale_colour_manual(values = c("red", "blue", "black", "orange"))
## Warning: Removed 1 rows containing missing values (geom_segment).
In the case of the steady state \(X_1\), a small perturbation leads to an oscillatory trajectory away from \(X_1\), so \(X_1\) is unstable. In the case of the steady state \(X_2\), a small perturbation leads to an orbit around \(X_2\), so \(X_2\) is a center.
We can also compute the stability of a steady state without performing perturbation simulations. We will need to evaluate the Jacobian matrix of the ODEs:
\[\begin{equation} \mathbf{J} = \begin{pmatrix} \frac{\partial f}{\partial N} & \frac{\partial f}{\partial P} \\ \frac{\partial g}{\partial N} & \frac{\partial g}{\partial P} \end {pmatrix} \end{equation}\]
Instead of using the analytical forms of the Jacobian matrix, we will estimate it numerically from the rate equations. The stability of a steady state can be determined by the eigenvalues of the Jacobian matrix according to following table. Note that the eigenvalues of a Jacobian matrix are not necessarily real numbers.
Conditions | Stability |
---|---|
\(\lambda_1 < 0\) & \(\lambda_2 < 0\) | Stable |
\(\lambda_1 \geq 0\) & \(\lambda_2 \geq 0\) | Unstable |
\(\lambda_1 \lambda_2 < 0\) | Saddle point |
\(Re(\lambda_1) < 0\) & \(Re(\lambda_2) < 0\) | Stable spiral |
\(Re(\lambda_1) \geq 0\) & \(Re(\lambda_2) \geq 0\) | Unstable spiral |
# A generic function to check stability for a 2D ODE sytem
stability_2D <- function(derivs, ss, ...) { # ss is a vector of steady state values X_S, Y_S
delta = 0.001
f_current = derivs(0,ss, ...) # f(x,y) this is computed, just in case it is not exactly 0
f_plus_dx = derivs(0,ss + c(delta,0), ...) # f(x+dx, y)
f_plus_dy = derivs(0,ss + c(0, delta), ...) # f(x, y+dx)
# finite difference to approximate Jacobian
dfxdx = (f_plus_dx[1] - f_current[1])/delta
dfxdy = (f_plus_dy[1] - f_current[1])/delta
dfydx = (f_plus_dx[2] - f_current[2])/delta
dfydy = (f_plus_dy[2] - f_current[2])/delta
jacobian = array(c(dfxdx, dfydx, dfxdy, dfydy), c(2,2))
lambda = eigen(jacobian)$values
if(class(lambda[1]) == "complex") {
if(Re(lambda[1]) < 0){
stability = 4 # stable spiral
}else{
stability = 5 # unstable spiral
}
}
else{
if((lambda[1] < 0) & (lambda[2] <0)){
stability = 1 # stable
}else if((lambda[1] >= 0) & (lambda[2] >= 0)){
stability = 2 # unstable
}else{
stability = 3 # saddle
}
}
return(stability)
}
We apply this method to the current model.
ss_all = rbind(c(0, 0), c(d/c, a/b))
ss_with_stability = t(apply(ss_all, MARGIN = 1, function(ss) {
return(c(ss,stability_2D(derivs = derivs_LV, ss = ss, a, b, c, d)))}))
colnames(ss_with_stability) = c("N", "P", "Stability")
ss_with_stability
## N P Stability
## [1,] 0 0.00000 3
## [2,] 50 33.33333 5
Here, “3” means a saddle point (a type of unstable steady state), and “5” means an unstable spiral (a center is classified into this category).
We consider the following data set of lynx/hare populations in 1900-1920 as collected by the Hudson Bay Company.
lvdata = data.frame(
year = seq(1900,1920,by=1),
hare = c(30, 47.2, 70.2, 77.4, 36.3, 20.6, 18.1, 21.4, 22, 25.4, 27.1, 40.3, 57, 76.6, 52.3, 19.5, 11.2, 7.6, 14.6, 16.2, 24.7),
lynx = c(4, 6.1, 9.8, 35.2, 59.4, 41.7, 19, 13, 8.3, 9.1, 7.4, 8, 12.3, 19.5, 45.7, 51.1, 29.7, 15.8, 9.7, 10.1, 8.6)
)
knitr::kable(lvdata, col.names = c("Year", "Hare (x1000)", "Lynx (x1000)"))
Year | Hare (x1000) | Lynx (x1000) |
---|---|---|
1900 | 30.0 | 4.0 |
1901 | 47.2 | 6.1 |
1902 | 70.2 | 9.8 |
1903 | 77.4 | 35.2 |
1904 | 36.3 | 59.4 |
1905 | 20.6 | 41.7 |
1906 | 18.1 | 19.0 |
1907 | 21.4 | 13.0 |
1908 | 22.0 | 8.3 |
1909 | 25.4 | 9.1 |
1910 | 27.1 | 7.4 |
1911 | 40.3 | 8.0 |
1912 | 57.0 | 12.3 |
1913 | 76.6 | 19.5 |
1914 | 52.3 | 45.7 |
1915 | 19.5 | 51.1 |
1916 | 11.2 | 29.7 |
1917 | 7.6 | 15.8 |
1918 | 14.6 | 9.7 |
1919 | 16.2 | 10.1 |
1920 | 24.7 | 8.6 |
We plot the time trajectory for both species:
plot(lvdata$year, lvdata$hare, type = "b", pch=15, col=2,
xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(lvdata$year, lvdata$lynx, type = "b", pch=19, col=3)
legend("topright", inset=0.02,
legend = c("Hare", "Lynx"),
col=2:3, lty=1, cex=0.8)
Or in the phase plane
plot(lvdata$hare, lvdata$lynx, type = "b", pch=15, col=2,
xlab="Hare x(1000)", ylab="Lynx x(1000)", xlim=c(0,100), ylim=c(0,100))
Now, the goal is to find the optimal parameters \(a\), \(b\), \(c\), \(d\) of the Lotka-Volterra model that best fit the data set. We consider two way to do so: (1) error minimization; (2) linear regression.
We define the error function as the sum of squares of differences (SSD) between the simulated levels and the levels from the data set. Starting from here, we use a vector p to represent all parameters.
# Sum of square of differences (SSD)
ssd <- function(array1, array2) {
diff = array1 - array2
return(sum(diff * diff))
}
cal_error <- function(derivs,data_exp, func_error, nsteps = 10, p){
# derivs defines the ODEs
# data_exp provides the data set;
# nsteps specifies the number of time steps per unit time (integer)
# dt gives the time step for the ODE integrator : dt = 1/nstep
#(nstep = 1, dt = 1 means that data fitting for every time step)
#(nstep = 10, dt = 0.1 means that data fitting for every 10 time steps)
#(nstep = 100, dt = 0.01 means that data fitting for every 100 time steps)
# func_error computes the errors, it must take two arrays to compare
# p is the vector of all parameters c(a, b, c, d)
t.total = NROW(data_exp) - 1
dt = 1/nsteps
X0 = data_exp[1,] # the first data point as the initial condition
sim = RK4_generic(derivs = derivs, X0 = X0, t.total = t.total, dt = dt,
a = p[1], b = p[2], c = p[3], d = p[4])
data_sim = sim[seq(1,NROW(sim),by = nsteps),2:3]
return(func_error(data_exp[,1], data_sim[,1]) + func_error(data_exp[,2], data_sim[,2]))
}
Note that, in the error function, the ODE simulation is done by integration of multiple time steps per unit time (Year). Compared to the implementation with one step per unit time, this allows more accurate simulations but slower optimization.
Taking any initial guess of the four parameters, we can compute the error of the model:
data_LV = cbind(lvdata$hare, lvdata$lynx)
p0 = c(0.5, 0.02, 0.02, 0.5)
cal_error(derivs = derivs_LV, data_exp = data_LV, func_error = ssd, nsteps = 10, p = p0)
## [1] 38932.85
We then perform nonlinear minimization of the error function with respect to the parameters \(a\), \(b\), \(c\) and \(d\). An easy way to do so is by an R function nlm. We will need to wrap the original cal_error to a new function f, in such as way that the four parameters are the only arguments. Note that nonlinear minimization by nlm is based on Newton’s method, therefore it can find a local minimum, but not the global minimum. We need to have a good initial guess to get a reasonable fitting.
f <- function(p) {
return(cal_error(derivs = derivs_LV, data_exp = data_LV, func_error = ssd, nsteps = 10, p))
}
p0 = c(0.5, 0.02, 0.02, 0.5)
f(p0)
## [1] 38932.85
fitted = nlm(f = f, p = p0)
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
p = fitted$estimate
f(p)
## [1] 753.7168
results_simu = RK4_generic(derivs = derivs_LV, X0 = data_LV[1,], t.total = 20, dt = 0.1,
a = p[1], b = p[2], c = p[3], d = p[4])
plot(lvdata$year, lvdata$hare, type = "b", pch=15, col=2,
xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(results_simu[,1] + 1900, results_simu[,2], type = "l", col = 3)
legend("topright", inset=0.02,
legend = c("Hare_data", "Hare_simu"),
col=2:3, lty=1, cex=0.8)
plot(lvdata$year, lvdata$lynx, type = "b", pch=15, col=2,
xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(results_simu[,1] + 1900, results_simu[,3], type = "l", col = 3)
legend("topright", inset=0.02,
legend = c("Lynx_data", "Lynx_simu"),
col=2:3, lty=1, cex=0.8)
plot(lvdata$hare, lvdata$lynx, type = "b", pch=15, col=2,
xlab="Hare x(1000)", ylab="Lynx x(1000)", xlim=c(0,100), ylim=c(0,100))
lines(results_simu[,2], results_simu[,3], type = "l", col = 3)
In the Lotka-Volterra model, the ODEs are linear functions of the four parameters. Therefore, with a little approximation, one can convert the model fitting problem into a linear regression problem.
Consider the ODEs:
\[ \frac{dx_i}{dt} = \sum_{j} {a_{ij}g_{ij}(x_1, ... , x_n)}\]
Replace the left hand side of the above equation with finite difference. We have
\[ \frac{x_i(t+\Delta t) - x_i(t-\Delta t)}{2\Delta t} = \sum_{j} {a_{ij}g_{ij}(x_1, ... , x_n)}\]
As all \(x_i(t)\) are known from the data (despite of very discrete data points), and functions \(g_{ij}\) are known from the Lotka-Volterra model. Thus, we can perform linear regression with respect to all parameters \(a_{ij}\) using linear equations for all variables \(x_i\) at all time points \(t\). In this implementation, we omit the first and the last time points to allow the obove time derivative evaluation.
N_all = lvdata$hare
P_all = lvdata$lynx
tot = length(N_all)
#dndt = a * N + b * (-N*P)
term_dndt = (N_all[3:tot] - N_all[1:(tot-2)])/2
term_n = N_all[2:(tot-1)]
term_minus_np = -N_all[2:(tot-1)]*P_all[2:(tot-1)]
data_n = data.frame(term_dndt = term_dndt, term_n = term_n, term_minus_np = term_minus_np)
model_n = lm(term_dndt ~ term_n + term_minus_np - 1, data_n)
model_n$coefficients
## term_n term_minus_np
## 0.47097918 0.02175128
#dpdt = c* (N*P) - d * P
term_dpdt = (P_all[3:tot] - P_all[1:(tot-2)])/2
term_np = N_all[2:(tot-1)]*P_all[2:(tot-1)]
term_minus_p = -P_all[2:(tot-1)]
data_p = data.frame(term_dpdt = term_dpdt, term_np = term_np, term_minus_p = term_minus_p)
model_p = lm(term_dpdt ~ term_np + term_minus_p - 1, data_p)
model_p$coefficients
## term_np term_minus_p
## 0.01992846 0.70788702
p0 = c(model_n$coefficients, model_p$coefficients)
f(p0)
## [1] 31145.87
results_simu2 = RK4_generic(derivs = derivs_LV, X0 = data_LV[1,], t.total = 20, dt = 0.1,
a = p0[1], b = p0[2], c = p0[3], d = p0[4])
plot(lvdata$year, lvdata$hare, type = "b", pch=15, col=2,
xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(results_simu2[,1] + 1900, results_simu2[,2], type = "l", col = 3)
legend("topright", inset=0.02,
legend = c("Hare_data", "Hare_simu"),
col=2:3, lty=1, cex=0.8)
plot(lvdata$year, lvdata$lynx, type = "b", pch=15, col=2,
xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(results_simu2[,1] + 1900, results_simu2[,3], type = "l", col = 3)
legend("topright", inset=0.02,
legend = c("Lynx_data", "Lynx_simu"),
col=2:3, lty=1, cex=0.8)
Linear regression gives deterministic results. The obtained parameter set does not work very well because of errors in derivative evaluation by finite difference. At least, this simple regression gives a good initial guess for nonlinear minimization.
fitted = nlm(f = f, p = p0)
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
p = fitted$estimate
f(p)
## [1] 753.7167
It is reasonable to hypothesize that the errors come from the finite difference approximation. To test that, we generate a data set from the simulated trajectory. We pick one point every three time steps (Readers can choose different values).
seed = 12
my_data <- results_simu[seq(1, NROW(results_simu),by = 3),] # data extracted from the simulations
nrow_my = NROW(my_data)
noise = 0
my_data[,2:3] = my_data[,2:3] + matrix(runif(nrow_my*2, -noise, noise), nrow = nrow_my, ncol = 2) # add noise
my_data[which(my_data < 0)] = 0 # negative numbers set to zeros
plot(my_data[,1], my_data[,2], type = "b", pch=15, col=2,
xlab="t", ylab="Levels", xlim=c(0,20), ylim=c(0,100))
lines(my_data[,1], my_data[,3], type = "b", pch=15, col=3)
Note that we provide an option to add a random perturbation to each data point. However, in the analysis below, we turn the noise option off by setting the noise level to be zero. Readers can try the analyses with different levels of noise.
We now repeat the whole regression analysis with this simulated data.
N_all = my_data[,2]
P_all = my_data[,3]
tot = length(N_all)
#dndt = a * N + b * (-N*P)
term_dndt = (N_all[3:tot] - N_all[1:(tot-2)])/0.3/2 # be careful of the calculation for delta t
term_n = N_all[2:(tot-1)]
term_minus_np = -N_all[2:(tot-1)]*P_all[2:(tot-1)]
data_n = data.frame(term_dndt = term_dndt, term_n = term_n, term_minus_np = term_minus_np)
model_n = lm(term_dndt ~ term_n + term_minus_np - 1, data_n)
model_n$coefficients
## term_n term_minus_np
## 0.53670257 0.02757445
#dpdt = c* (N*P) - d * P
term_dpdt = (P_all[3:tot] - P_all[1:(tot-2)])/0.3/2
term_np = N_all[2:(tot-1)]*P_all[2:(tot-1)]
term_minus_p = -P_all[2:(tot-1)]
data_p = data.frame(term_dpdt = term_dpdt, term_np = term_np, term_minus_p = term_minus_p)
model_p = lm(term_dpdt ~ term_np + term_minus_p - 1, data_p)
model_p$coefficients
## term_np term_minus_p
## 0.02597237 0.82460352
p1 = c(model_n$coefficients, model_p$coefficients)
f(p1)
## [1] 1209.514
results_simu3 = RK4_generic(derivs = derivs_LV, X0 = data_LV[1,], t.total = 20, dt = 0.1,
a = p1[1], b = p1[2], c = p1[3], d = p1[4])
plot(my_data[,1], my_data[,2], type = "b", pch=15, col=2,
xlab="t", ylab="Levels", xlim=c(0,20), ylim=c(0,100))
lines(results_simu3[,1], results_simu3[,2], type = "l", col = 3)
legend("topright", inset=0.02,
legend = c("Hare_data", "Hare_simu"),
col=2:3, lty=1, cex=0.8)
plot(my_data[,1], my_data[,3], type = "b", pch=15, col=2,
xlab="t", ylab="Levels", xlim=c(0,20), ylim=c(0,100))
lines(results_simu3[,1], results_simu3[,3], type = "l", col = 3)
legend("topright", inset=0.02,
legend = c("Lynx_data", "Lynx_simu"),
col=2:3, lty=1, cex=0.8)
As shown from the results, linear regression works much better if not perfect.