In this section, we will model predator-prey relationship. The system is described by the classic Lotka-Volterra model.

Lotka-Volterra model

The system is described by:

\[\begin{equation} \begin{cases} \frac{dN}{dt} = f(N,C) = N (a - bP) \\ \frac{dP}{dt} = g(N,C) = P (cN -d) \tag{1} \end{cases} \end{equation}\]

ODE simulation

We first define the derivative function. Here, we allow \(a\), \(b\), \(c\), and \(d\) as four explicit parameters.

derivs_LV <- function(t, Xs, a, b, c, d) { # derivatives of the Lotka-Volterra model
  N = Xs[1]
  P = Xs[2]
  dNdt = N * (a- b*P)
  dPdt = P * (c*N - d)
  return(c(dNdt, dPdt))
}

We will use the RK4 integrator to simulate the ODEs defined by Equation (1).

# 4th order Runge-Kutta (RK4) for a generic multi-variable system, see Part 3A
RK4_generic <- function(derivs, X0, t.total, dt, ...){
  # derivs: the function of the derivatives 
  # X0: initial condition, a vector of multiple variables
  # t.total: total simulation time, assuming t starts from 0 at the beginning
  # dt: time step size 
  t_all = seq(0, t.total, by=dt)
  n_all = length(t_all)
  nx = length(X0)
  X_all = matrix(0, nrow = n_all, ncol = nx)
  X_all[1,] =  X0
  for (i in 1:(n_all-1)) {
    t_0= t_all[i]
    t_0.5 = t_0 + 0.5*dt
    t_1 = t_0 + dt
    k1 = dt * derivs(t_0,X_all[i,],...)
    k2 = dt * derivs(t_0.5,X_all[i,] + k1/2,...)
    k3 = dt * derivs(t_0.5,X_all[i,] + k2/2,...)
    k4 = dt * derivs(t_1,X_all[i,] + k3,...)
    X_all[i+1,] = X_all[i,] + (k1+2*k2+2*k3+k4)/6
  }
  return(cbind(t_all, X_all))   # the output is a matrix of t & X(t) for all time steps
}

We set \(a = d = 1\), \(b = 0.03\), and \(c = 0.02\)

If we start the simulation from the initial condition \({X(t = 0)} = (2, 0)\),

a = 1; b = 0.03; c = 0.02; d = 1
X0_1 = c(2, 0)  # start from N = 2, P = 0, 
results_LV_1 = RK4_generic(derivs = derivs_LV, X0 = X0_1, t.total = 10, dt = 0.01, a, b, c, d)
plot(results_LV_1[,1], results_LV_1[,2], type = "l", col=2,
    xlab="t", ylab="Levels", xlim=c(0,10), ylim=c(0,100))
lines(results_LV_1[,1], results_LV_1[,3], type = "l", col=3)

legend("topright", inset=0.02, 
       legend = c("N", "P"),
       col=2:3, lty=1, cex=0.8)

With no predator, prey follows exponential growth.

If we start the simulation from the initial condition \({X(t = 0)} = (0, 100)\),

X0_2 = c(0, 100)  # start from N = 0, P = 100, 
results_LV_2 = RK4_generic(derivs = derivs_LV, X0 = X0_2, t.total = 10, dt = 0.01, a, b, c, d)
plot(results_LV_2[,1], results_LV_2[,2], type = "l", col=2,
    xlab="t", ylab="Levels", xlim=c(0,10), ylim=c(0,100))
lines(results_LV_2[,1], results_LV_2[,3], type = "l", col=3)

legend("topright", inset=0.02, 
       legend = c("N", "P"),
       col=2:3, lty=1, cex=0.8)

With no prey, predator follows exponential decay.

If we start the simulation from the initial condition \({X(t = 0)} = (30, 10)\),

X0_3 = c(30, 10)  # start from N = 30, P = 10, 
results_LV_3 = RK4_generic(derivs = derivs_LV, X0 = X0_3, t.total = 50, dt = 0.01, a, b, c, d)
plot(results_LV_3[,1], results_LV_3[,2], type = "l", col=2,
    xlab="t", ylab="Levels", xlim=c(0,50), ylim=c(0,150))
lines(results_LV_3[,1], results_LV_3[,3], type = "l", col=3)

legend("topright", inset=0.02, 
       legend = c("N", "P"),
       col=2:3, lty=1, cex=0.8)

When both predator and prey exist, the system generates oscillations.

Here we explore a few different initial conditions and plot the time trajectories in a plane plane of \(N\) and \(P\).

# Another three initial conditions
X0_4 = c(40, 20)
X0_5 = c(30, 25)
X0_6 = c(20, 40)

# A total of four initial conditions to consider
IC_all = rbind(X0_3, X0_4, X0_5, X0_6)
legend_IC = apply(IC_all, 1, paste, collapse = ",")

results_LV_4 = RK4_generic(derivs = derivs_LV, X0 = X0_4, t.total = 50, dt = 0.01, a, b, c, d)
results_LV_5 = RK4_generic(derivs = derivs_LV, X0 = X0_5, t.total = 50, dt = 0.01, a, b, c, d)
results_LV_6 = RK4_generic(derivs = derivs_LV, X0 = X0_6, t.total = 50, dt = 0.01, a, b, c, d)
colnames(results_LV_4) = c("t", "N", "P")
colnames(results_LV_5) = c("t", "N", "P")
colnames(results_LV_6) = c("t", "N", "P")

plot(IC_all[,1], IC_all[,2], type = "p", pch = 16, col = 1,
    xlab="N", ylab="P", xlim=c(0,150), ylim=c(0,100))
lines(results_LV_3[,2], results_LV_3[,3], type = "l", col=2)
lines(results_LV_4[,2], results_LV_4[,3], type = "l", col=3)
lines(results_LV_5[,2], results_LV_5[,3], type = "l", col=4)
lines(results_LV_6[,2], results_LV_6[,3], type = "l", col=5)

legend("topright", title = "Initial conditions", inset=0.02, 
       legend = legend_IC,
       col=2:5, lty=1, cex=0.8)

Vector field

The directions of changes of an ODE system can be represented by vectors in the phase plane.

\[\begin{equation} \begin{cases} V_X = \frac{dX}{dt} \\ V_Y = \frac{dY}{dt} \end{cases} \end{equation}\]

library(ggplot2)

N_all = seq(0, 150, by=10)   # all N grids
P_all = seq(0, 100, by=10)   # all P grids
NP_all = expand.grid(N_all, P_all)   # all combinations of N and P

results = t(apply(NP_all, MARGIN=1, function(Xs) {return(c(Xs, derivs_LV(0, Xs, a, b, c, d)))})) # generate all vector field data
colnames(results) = c("N", "P", "dN", "dP")

scale = 0.1
ggplot(data=as.data.frame(results), aes(x=N, y=P)) + 
  geom_segment(aes(xend=N+scale*dN, yend=P+scale*dP), arrow = arrow(length = unit(0.05,"in")))

We can choose to plot the unit vectors of the vector field to highlight the directions.

results_unit = t(apply(NP_all, MARGIN=1, function(Xs) {
  v = derivs_LV(0, Xs, a, b, c, d)
  v_norm = v / sqrt(v[1]**2 + v[2]**2)
  return(c(Xs, v_norm))})) # generate unit vector
colnames(results_unit) = c("N", "P", "dN", "dP")

scale = 3
p1 = ggplot(data=as.data.frame(results_unit), aes(x=N, y=P)) + 
  geom_segment(aes(xend=N+scale*dN, yend=P+scale*dP), arrow = arrow(length = unit(0.05,"in")))

colnames(results_LV_3) = c("t", "N", "P")
p1 + guides(color="none") + 
   geom_path(data = as.data.frame(results_LV_3), aes(x=N, y=P, colour = 1), size=1) 

Below we choose different initial conditions and, for each, simulate the time trajectory and plot it in the same phase plane.

p2 = p1 + guides(color=guide_legend("ICs")) +
   geom_path(data = as.data.frame(results_LV_3), aes(x=N, y=P, colour = legend_IC[1]), size=1) +
   geom_path(data = as.data.frame(results_LV_4), aes(x=N, y=P, colour = legend_IC[2]), size=1) +
   geom_path(data = as.data.frame(results_LV_5), aes(x=N, y=P, colour = legend_IC[3]), size=1) +
   geom_path(data = as.data.frame(results_LV_6), aes(x=N, y=P, colour = legend_IC[4]), size=1) 
p2

# Nullclines

Another powerful method to evaluate the behavior of a nonlinear dynamical system is the techniques of nullcline analysis. A nullcline corresponds to the steady state condition of one of the two ODEs: (1) \(\frac{dX}{dt} = 0\) and (2) \(\frac{dY}{dt} = 0\). In the Lotka-Volterra model, the N-nullcline satisfies

\(N = 0\) or \(P = \frac{a}{b}\);

The P-nullcline satisfies

\(P = 0\) or \(N = \frac{d}{c}\).

We add the two nullclines to the phase plane. The intersections of the N-nullcline and P-nullcline are the steady states of the system.

\[\begin{equation} \begin{cases} \frac{dN}{dt} = 0 \\ \frac{dP}{dt} = 0 \end{cases} \end{equation}\]

In this case, since each nullcline has two line segments, pay attention that only the intersections of lines in different colors can be steady states. Here, the first steady state \(X_1 = (0, 0)\) and the second steady state \(X_2 = (\frac{d}{c}, \frac{a}{b})\) (black dots in the following plot).

# Vector field + Nullclines + Steady states
p3 = p1 + 
  geom_hline(aes(yintercept = a/b, color = "N-nullcline")) +
  geom_vline(aes(xintercept = 0, color = "N-nullcline")) +
  geom_vline(aes(xintercept = d/c, color = "P-nullcline")) +
  geom_hline(aes(yintercept = 0, color = "P-nullcline")) + 
  geom_point(aes(x=0, y=0, colour="Steady states"), size = 3) + 
  geom_point(aes(x=d/c, y=a/b, colour="Steady states"), size = 3) 

# Plus a time trajectroy
p3 +  geom_path(data = as.data.frame(results_LV_3), aes(x=N, y=P, color = "Trajectory"), size=1) +
  scale_colour_manual(values = c("red", "blue", "black", "orange"))

Stability of a steady state

For each steady state, we perturb the state of the system slightly away from the steady state. Pay attention that the state variables \(N\) and \(P\) need to be non-negative.

results_LV_ss1 = RK4_generic(derivs = derivs_LV, X0 = c(10, 12), t.total = 10, dt = 0.01, a, b, c, d)
colnames(results_LV_ss1) = c("t", "N", "P")
results_LV_ss2 = RK4_generic(derivs = derivs_LV, X0 = c(d/c + 3, a/b - 4), t.total = 10, dt = 0.01, a, b, c, d)
colnames(results_LV_ss2) = c("t", "N", "P")
# Plus a time trajectory
p3 +  geom_path(data = as.data.frame(results_LV_ss1), aes(x=N, y=P, color = "Trajectory"), size=1) +
   geom_path(data = as.data.frame(results_LV_ss2), aes(x=N, y=P, color = "Trajectory"), size=1) +
  scale_colour_manual(values = c("red", "blue", "black", "orange"))
## Warning: Removed 1 rows containing missing values (geom_segment).

In the case of the steady state \(X_1\), a small perturbation leads to an oscillatory trajectory away from \(X_1\), so \(X_1\) is unstable. In the case of the steady state \(X_2\), a small perturbation leads to an orbit around \(X_2\), so \(X_2\) is a center.

Stability analysis using the Jacobian

We can also compute the stability of a steady state without performing perturbation simulations. We will need to evaluate the Jacobian matrix of the ODEs:

\[\begin{equation} \mathbf{J} = \begin{pmatrix} \frac{\partial f}{\partial N} & \frac{\partial f}{\partial P} \\ \frac{\partial g}{\partial N} & \frac{\partial g}{\partial P} \end {pmatrix} \end{equation}\]

Instead of using the analytical forms of the Jacobian matrix, we will estimate it numerically from the rate equations. The stability of a steady state can be determined by the eigenvalues of the Jacobian matrix according to following table. Note that the eigenvalues of a Jacobian matrix are not necessarily real numbers.

Conditions Stability
\(\lambda_1 < 0\) & \(\lambda_2 < 0\) Stable
\(\lambda_1 \geq 0\) & \(\lambda_2 \geq 0\) Unstable
\(\lambda_1 \lambda_2 < 0\) Saddle point
\(Re(\lambda_1) < 0\) & \(Re(\lambda_2) < 0\) Stable spiral
\(Re(\lambda_1) \geq 0\) & \(Re(\lambda_2) \geq 0\) Unstable spiral
# A generic function to check stability for a 2D ODE sytem
stability_2D <- function(derivs, ss, ...) { # ss is a vector of steady state values X_S, Y_S
  delta = 0.001
  f_current = derivs(0,ss, ...)   # f(x,y) this is computed, just in case it is not exactly 0
  f_plus_dx = derivs(0,ss + c(delta,0), ...) # f(x+dx, y)
  f_plus_dy = derivs(0,ss + c(0, delta), ...) # f(x, y+dx)
  
  # finite difference to approximate Jacobian
  dfxdx = (f_plus_dx[1] - f_current[1])/delta
  dfxdy = (f_plus_dy[1] - f_current[1])/delta
  dfydx = (f_plus_dx[2] - f_current[2])/delta
  dfydy = (f_plus_dy[2] - f_current[2])/delta
  
  jacobian = array(c(dfxdx, dfydx, dfxdy, dfydy), c(2,2))
  lambda = eigen(jacobian)$values
  if(class(lambda[1]) == "complex") {
    if(Re(lambda[1]) < 0){
      stability = 4   # stable spiral
    }else{
      stability = 5   # unstable spiral
    }
  }
  else{
    if((lambda[1] < 0) & (lambda[2] <0)){
      stability = 1   # stable 
    }else if((lambda[1] >= 0) & (lambda[2] >= 0)){
      stability = 2   # unstable 
    }else{
      stability = 3   # saddle
    }
  }
  return(stability)
}

We apply this method to the current model.

ss_all = rbind(c(0, 0), c(d/c, a/b))

ss_with_stability =  t(apply(ss_all, MARGIN = 1, function(ss) {
                          return(c(ss,stability_2D(derivs = derivs_LV, ss = ss, a, b, c, d)))}))
colnames(ss_with_stability) = c("N", "P", "Stability")
ss_with_stability
##       N        P Stability
## [1,]  0  0.00000         3
## [2,] 50 33.33333         5

Here, “3” means a saddle point (a type of unstable steady state), and “5” means an unstable spiral (a center is classified into this category).

Model fitting

We consider the following data set of lynx/hare populations in 1900-1920 as collected by the Hudson Bay Company.

lvdata = data.frame(
         year = seq(1900,1920,by=1),
         hare = c(30, 47.2, 70.2, 77.4, 36.3, 20.6, 18.1, 21.4, 22, 25.4, 27.1, 40.3, 57, 76.6, 52.3, 19.5, 11.2, 7.6, 14.6, 16.2, 24.7),
         lynx = c(4, 6.1, 9.8, 35.2, 59.4, 41.7, 19, 13, 8.3, 9.1, 7.4, 8, 12.3, 19.5, 45.7, 51.1, 29.7, 15.8, 9.7, 10.1, 8.6)
)
knitr::kable(lvdata, col.names = c("Year", "Hare (x1000)", "Lynx (x1000)"))
Year Hare (x1000) Lynx (x1000)
1900 30.0 4.0
1901 47.2 6.1
1902 70.2 9.8
1903 77.4 35.2
1904 36.3 59.4
1905 20.6 41.7
1906 18.1 19.0
1907 21.4 13.0
1908 22.0 8.3
1909 25.4 9.1
1910 27.1 7.4
1911 40.3 8.0
1912 57.0 12.3
1913 76.6 19.5
1914 52.3 45.7
1915 19.5 51.1
1916 11.2 29.7
1917 7.6 15.8
1918 14.6 9.7
1919 16.2 10.1
1920 24.7 8.6

We plot the time trajectory for both species:

plot(lvdata$year, lvdata$hare, type = "b", pch=15, col=2,
    xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(lvdata$year, lvdata$lynx, type = "b", pch=19, col=3)

legend("topright", inset=0.02, 
       legend = c("Hare", "Lynx"),
       col=2:3, lty=1, cex=0.8)

Or in the phase plane

plot(lvdata$hare, lvdata$lynx, type = "b", pch=15, col=2,
    xlab="Hare x(1000)", ylab="Lynx x(1000)", xlim=c(0,100), ylim=c(0,100))

Now, the goal is to find the optimal parameters \(a\), \(b\), \(c\), \(d\) of the Lotka-Volterra model that best fit the data set. We consider two way to do so: (1) error minimization; (2) linear regression.

Error minimization

We define the error function as the sum of squares of differences (SSD) between the simulated levels and the levels from the data set. Starting from here, we use a vector p to represent all parameters.

# Sum of square of differences (SSD)
ssd <- function(array1, array2) {
  diff = array1 - array2
  return(sum(diff * diff))
}

cal_error <- function(derivs,data_exp, func_error, nsteps = 10, p){  
    # derivs defines the ODEs 
    # data_exp provides the data set; 
    # nsteps specifies the number of time steps per unit time (integer)
    # dt gives the time step for the ODE integrator : dt = 1/nstep
      #(nstep = 1, dt = 1 means that data fitting for every time step)
      #(nstep = 10, dt = 0.1 means that data fitting for every 10 time steps)
      #(nstep = 100, dt = 0.01 means that data fitting for every 100 time steps)
    # func_error computes the errors, it must take two arrays to compare
    # p is the vector of all parameters c(a, b, c, d)
    t.total = NROW(data_exp) - 1
    dt = 1/nsteps
    X0 = data_exp[1,]   # the first data point as the initial condition
    sim = RK4_generic(derivs = derivs, X0 = X0, t.total = t.total, dt = dt, 
                         a = p[1], b = p[2], c = p[3], d = p[4])
    data_sim = sim[seq(1,NROW(sim),by = nsteps),2:3]
    
    return(func_error(data_exp[,1], data_sim[,1]) + func_error(data_exp[,2], data_sim[,2]))
}

Note that, in the error function, the ODE simulation is done by integration of multiple time steps per unit time (Year). Compared to the implementation with one step per unit time, this allows more accurate simulations but slower optimization.

Taking any initial guess of the four parameters, we can compute the error of the model:

data_LV = cbind(lvdata$hare, lvdata$lynx)
p0 = c(0.5, 0.02, 0.02, 0.5)
cal_error(derivs = derivs_LV, data_exp = data_LV, func_error = ssd, nsteps = 10, p = p0)
## [1] 38932.85

We then perform nonlinear minimization of the error function with respect to the parameters \(a\), \(b\), \(c\) and \(d\). An easy way to do so is by an R function nlm. We will need to wrap the original cal_error to a new function f, in such as way that the four parameters are the only arguments. Note that nonlinear minimization by nlm is based on Newton’s method, therefore it can find a local minimum, but not the global minimum. We need to have a good initial guess to get a reasonable fitting.

f <- function(p) {
  return(cal_error(derivs = derivs_LV, data_exp = data_LV, func_error = ssd, nsteps = 10, p))
}

p0 = c(0.5, 0.02, 0.02, 0.5)
f(p0)
## [1] 38932.85
fitted = nlm(f = f, p = p0)
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
p = fitted$estimate
f(p)
## [1] 753.7168
results_simu = RK4_generic(derivs = derivs_LV, X0 = data_LV[1,], t.total = 20, dt = 0.1, 
                              a = p[1], b = p[2], c = p[3], d = p[4])

plot(lvdata$year, lvdata$hare, type = "b", pch=15, col=2,
    xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(results_simu[,1] + 1900, results_simu[,2], type = "l", col = 3)

legend("topright", inset=0.02, 
       legend = c("Hare_data", "Hare_simu"),
       col=2:3, lty=1, cex=0.8)

plot(lvdata$year, lvdata$lynx, type = "b", pch=15, col=2,
    xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(results_simu[,1] + 1900, results_simu[,3], type = "l", col = 3)

legend("topright", inset=0.02, 
       legend = c("Lynx_data", "Lynx_simu"),
       col=2:3, lty=1, cex=0.8)

plot(lvdata$hare, lvdata$lynx, type = "b", pch=15, col=2,
    xlab="Hare x(1000)", ylab="Lynx x(1000)", xlim=c(0,100), ylim=c(0,100))
lines(results_simu[,2], results_simu[,3], type = "l", col = 3)

Linear regression

In the Lotka-Volterra model, the ODEs are linear functions of the four parameters. Therefore, with a little approximation, one can convert the model fitting problem into a linear regression problem.

Consider the ODEs:

\[ \frac{dx_i}{dt} = \sum_{j} {a_{ij}g_{ij}(x_1, ... , x_n)}\]

Replace the left hand side of the above equation with finite difference. We have

\[ \frac{x_i(t+\Delta t) - x_i(t-\Delta t)}{2\Delta t} = \sum_{j} {a_{ij}g_{ij}(x_1, ... , x_n)}\]

As all \(x_i(t)\) are known from the data (despite of very discrete data points), and functions \(g_{ij}\) are known from the Lotka-Volterra model. Thus, we can perform linear regression with respect to all parameters \(a_{ij}\) using linear equations for all variables \(x_i\) at all time points \(t\). In this implementation, we omit the first and the last time points to allow the obove time derivative evaluation.

N_all = lvdata$hare
P_all = lvdata$lynx
tot = length(N_all)
#dndt = a * N + b * (-N*P)
term_dndt = (N_all[3:tot] - N_all[1:(tot-2)])/2
term_n = N_all[2:(tot-1)]
term_minus_np = -N_all[2:(tot-1)]*P_all[2:(tot-1)]

data_n = data.frame(term_dndt = term_dndt, term_n = term_n, term_minus_np = term_minus_np)
model_n = lm(term_dndt ~ term_n + term_minus_np - 1, data_n)
model_n$coefficients
##        term_n term_minus_np 
##    0.47097918    0.02175128
#dpdt = c* (N*P) - d * P
term_dpdt = (P_all[3:tot] - P_all[1:(tot-2)])/2
term_np = N_all[2:(tot-1)]*P_all[2:(tot-1)]
term_minus_p = -P_all[2:(tot-1)]

data_p = data.frame(term_dpdt = term_dpdt, term_np = term_np, term_minus_p = term_minus_p)
model_p = lm(term_dpdt ~ term_np + term_minus_p - 1, data_p)
model_p$coefficients
##      term_np term_minus_p 
##   0.01992846   0.70788702
p0 = c(model_n$coefficients, model_p$coefficients)
f(p0)
## [1] 31145.87
results_simu2 = RK4_generic(derivs = derivs_LV, X0 = data_LV[1,], t.total = 20, dt = 0.1, 
                              a = p0[1], b = p0[2], c = p0[3], d = p0[4])

plot(lvdata$year, lvdata$hare, type = "b", pch=15, col=2,
    xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(results_simu2[,1] + 1900, results_simu2[,2], type = "l", col = 3)

legend("topright", inset=0.02, 
       legend = c("Hare_data", "Hare_simu"),
       col=2:3, lty=1, cex=0.8)

plot(lvdata$year, lvdata$lynx, type = "b", pch=15, col=2,
    xlab="t", ylab="Levels", xlim=c(1900,1920), ylim=c(0,100))
lines(results_simu2[,1] + 1900, results_simu2[,3], type = "l", col = 3)

legend("topright", inset=0.02, 
       legend = c("Lynx_data", "Lynx_simu"),
       col=2:3, lty=1, cex=0.8)

Linear regression gives deterministic results. The obtained parameter set does not work very well because of errors in derivative evaluation by finite difference. At least, this simple regression gives a good initial guess for nonlinear minimization.

fitted = nlm(f = f, p = p0)
## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value

## Warning in nlm(f = f, p = p0): NA/Inf replaced by maximum positive value
p = fitted$estimate
f(p)
## [1] 753.7167

A test on a simulated data with more time points

It is reasonable to hypothesize that the errors come from the finite difference approximation. To test that, we generate a data set from the simulated trajectory. We pick one point every three time steps (Readers can choose different values).

seed = 12
my_data <- results_simu[seq(1, NROW(results_simu),by = 3),]   # data extracted from the simulations
nrow_my = NROW(my_data)
noise = 0
my_data[,2:3] = my_data[,2:3] + matrix(runif(nrow_my*2, -noise, noise), nrow = nrow_my, ncol = 2) # add noise
my_data[which(my_data < 0)] = 0   # negative numbers set to zeros

plot(my_data[,1], my_data[,2], type = "b", pch=15, col=2,
    xlab="t", ylab="Levels", xlim=c(0,20), ylim=c(0,100))
lines(my_data[,1], my_data[,3], type = "b", pch=15, col=3)

Note that we provide an option to add a random perturbation to each data point. However, in the analysis below, we turn the noise option off by setting the noise level to be zero. Readers can try the analyses with different levels of noise.

We now repeat the whole regression analysis with this simulated data.

N_all = my_data[,2]
P_all = my_data[,3]
tot = length(N_all)
#dndt = a * N + b * (-N*P)
term_dndt = (N_all[3:tot] - N_all[1:(tot-2)])/0.3/2 # be careful of the calculation for delta t
term_n = N_all[2:(tot-1)]
term_minus_np = -N_all[2:(tot-1)]*P_all[2:(tot-1)]

data_n = data.frame(term_dndt = term_dndt, term_n = term_n, term_minus_np = term_minus_np)
model_n = lm(term_dndt ~ term_n + term_minus_np - 1, data_n)
model_n$coefficients
##        term_n term_minus_np 
##    0.53670257    0.02757445
#dpdt = c* (N*P) - d * P
term_dpdt = (P_all[3:tot] - P_all[1:(tot-2)])/0.3/2
term_np = N_all[2:(tot-1)]*P_all[2:(tot-1)]
term_minus_p = -P_all[2:(tot-1)]

data_p = data.frame(term_dpdt = term_dpdt, term_np = term_np, term_minus_p = term_minus_p)
model_p = lm(term_dpdt ~ term_np + term_minus_p - 1, data_p)
model_p$coefficients
##      term_np term_minus_p 
##   0.02597237   0.82460352
p1 = c(model_n$coefficients, model_p$coefficients)
f(p1)
## [1] 1209.514
results_simu3 = RK4_generic(derivs = derivs_LV, X0 = data_LV[1,], t.total = 20, dt = 0.1, 
                              a = p1[1], b = p1[2], c = p1[3], d = p1[4])

plot(my_data[,1], my_data[,2], type = "b", pch=15, col=2,
    xlab="t", ylab="Levels", xlim=c(0,20), ylim=c(0,100))
lines(results_simu3[,1], results_simu3[,2], type = "l", col = 3)

legend("topright", inset=0.02, 
       legend = c("Hare_data", "Hare_simu"),
       col=2:3, lty=1, cex=0.8)

plot(my_data[,1], my_data[,3], type = "b", pch=15, col=2,
    xlab="t", ylab="Levels", xlim=c(0,20), ylim=c(0,100))
lines(results_simu3[,1], results_simu3[,3], type = "l", col = 3)

legend("topright", inset=0.02, 
       legend = c("Lynx_data", "Lynx_simu"),
       col=2:3, lty=1, cex=0.8)

As shown from the results, linear regression works much better if not perfect.