library(readxl)
library(dplyr)
library(purrr)
library(tidyr)
library(bbmle)
Of mice and men
All the data and source code files are here.
You can ask for additional analysis here.
1 Introduction
Here we are working on the data from Zhang et al. “Short-course chemotherapy with TMC207 and rifapentine in a murine model of latent tuberculosis infection”. American Journal of Respiratory and Critical Care Medicine 184.6 (2011): 732-737. DOI: 10.1164/rccm.201103-0397OC. Specifically the data from Table 2 shown in Figure 1 below.
The data show the proportion of positive mice as a function of time (about 15 mice per time point and treatment) but what we are interested in is the CFU clearance dynamics at the individual level. Since the former is a consequence of the latter, we develop a simple model of CFU clearance at the individual level from which we compute the consequences at (small-size) population level. By confronting the model-predicted consequences at population level with actual experimental data, we are able to estimate the two parameters of the model. Once the model is calibrated, we can use it to explore what would be the effect of decreasing the initial value of CFU on the time at which the individual clear the infection.
The model itself is a simple exponential decay model with inter-individual variability on the rate of decay that is modelled by a Gamma distribution.
2 Packages
Loading the packages needed for the analysis:
3 Utilitary functions
Width of the lines:
<- 2 line_width
A function that adds proportions estimates and confidence intervals to a data frame from a column of successes and a column or trials:
<- function(df, x, n, p = "est", l = "lwr", u = "upr", ...) {
add_prop_est |>
df mutate(test = map2({{ x }}, {{ n }}, prop.test, ...),
"{p}" := map_dbl(test, ~ .x[["estimate"]]),
conf = map(test, ~ setNames(.x[["conf.int"]], c(l, u)))) |>
unnest_wider(conf) |>
select(- test)
}
A reparameterization of dgamma()
:
<- function(x, mu, sigma) {
dgamma2 dgamma(x, mu^2 / sigma^2, scale = sigma^2 / mu)
}
A reparameterization of qgamma()
:
<- function(p, mu, sigma) {
qgamma2 qgamma(p, mu^2 / sigma^2, scale = sigma^2 / mu)
}
A function that draw a plot frame:
<- function(t_max = 6 * 30, agg = 7, log = FALSE, ...) {
plot_frame if (log) {
plot(NA, xlim = c(0, t_max / agg), ylab = "CFU", axes = FALSE,
ylim = c(1, 1e4), log = "y", ...)
abline2(v = 0:30)
abline2(h = unlist(map(10^(0:3), ~ .x * 1:10)))
else {
} plot(NA, xlim = c(0, t_max / agg), ylab = "CFU", axes = FALSE,
ylim = c(0, 1e4), ...)
abline2(v = 0:30)
abline2(h = 0:10 * 1e3)
}axis(1); axis(2)
}
A function that adds legend:
<- function(where = "topright", ...) {
add_legend legend(where, legend = c("RIF", "RIF+INH", "RPT+INH"),
col = col_treatments[-1], lwd = line_width, ...)
}
Tuning legend()
:
<- function(...) legend(..., bty = "n") legend2
Tuning abline()
:
<- function(...) abline(..., col = "antiquewhite") abline2
Tuning point()
:
<- function(...) points(..., lwd = line_width) points2
Tuning arrows()
:
<- function(...) {
arrows2 arrows(..., length = 0.1, angle = 90, code = 3, lwd = line_width)
}
Tuning lines()
:
<- function(...) lines(..., lwd = line_width) lines2
Tuning polygon()
:
<- function(x, y1, y2, col, ...) {
polygon2 polygon(c(x, rev(x)), c(y1, rev(y2)), border = NA, col = adjustcolor(col, .2), ...)
}
Tuning seq()
:
<- function(...) seq(..., le = 100)
seq2 <- function(...) seq(..., le = 512) seq3
Tuning title()
:
<- function(...) title(..., line = .5)
title2 <- function(...) title(..., line = 0) title3
4 The data
Reading the data:
<- read_excel("table 2 Zhang et al 2011.xlsx") table2
The data look like this:
table2
# A tibble: 12 × 4
group duration positive total
<chr> <dbl> <dbl> <dbl>
1 INH 4 15 15
2 INH 6 15 15
3 RIF 2 15 15
4 RIF 3 13 15
5 RIF 4 6 13
6 RIF+INH 2 14 15
7 RIF+INH 3 7 13
8 RPT+INH 1 9 15
9 RPT+INH 2 0 15
10 TMC 2 13 15
11 TMC 3 2 14
12 TMC 4 4 14
Preparing the data for vizualization by:
- computing the proportions of positive mice with 95% confidence intervals
- jittering the duration variable slightly in order to avoid visual overlap
<- table2 |>
table2a add_prop_est(positive, total) |>
mutate_at("duration", jitter, factor = .2)
It looks like this:
table2a
# A tibble: 12 × 7
group duration positive total est lwr upr
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 INH 4.00 15 15 1 0.747 1
2 INH 6.02 15 15 1 0.747 1
3 RIF 2.02 15 15 1 0.747 1
4 RIF 3.02 13 15 0.867 0.584 0.977
5 RIF 3.99 6 13 0.462 0.204 0.739
6 RIF+INH 1.97 14 15 0.933 0.660 0.997
7 RIF+INH 3.03 7 13 0.538 0.261 0.796
8 RPT+INH 0.982 9 15 0.6 0.329 0.825
9 RPT+INH 2.00 0 15 0 0 0.253
10 TMC 2.02 13 15 0.867 0.584 0.977
11 TMC 2.98 2 14 0.143 0.0251 0.438
12 TMC 3.98 4 14 0.286 0.0958 0.580
The treatments:
<- unique(table2$group) treatments
The colors of the treatments:
<- setNames(seq_along(treatments), treatments) col_treatments
A function that plots proportion estimates and confidence intervals for a given treatment x
:
<- function(x, col, polyg = TRUE, connect = TRUE) {
plot_prop_est with(x, {
if (polyg) polygon2(duration, lwr, upr, col)
points2(duration, est, col = col, type = ifelse(connect, "o", "p"))
arrows2(duration, lwr, duration, upr, col = col)
if (connect) {
lines2(duration, lwr, lty = 2, col = col)
lines2(duration, upr, lty = 2, col = col)
}
}) }
A function that plots the proportions estimates from the data:
<- function(polyg = TRUE, connect = TRUE) {
plot_data_prop_est plot(NA, xlim = c(0, 6), ylim = 0:1,
xlab = "duration of treatment (months)",
ylab = "proportion of positive mice")
|>
table2a group_by(group) |>
group_split() |>
walk2(col_treatments, plot_prop_est, polyg, connect)
}
The plot showing points estimates with confidence intervals from the data:
plot_data_prop_est()
legend2("bottomright", legend = treatments, col = col_treatments, lwd = line_width)
5 A model of CFU decay
5.1 Exponential decay
Let’s assume that the CFU in an individual decays at a constant rate \(\lambda\). Then, the CFU as a function of time \(t\) reads:
\[ \mbox{CFU}(t) = \mbox{CFU}_0 e^{-\lambda t} \]
A function that gives CFU as a function of initial CFU\(_0\), decay rate and time:
<- function(cfu0, lambda, t) {
cfu * exp(- lambda * t)
cfu0 }
Let’s try it:
cfu(1e4, .115, 30)
[1] 317.4564
5.2 Individual variability
The next step is to consider that the rate of exponential decay varies slightly from individual to individual. Let’s consider that this inter-individual variability follows a Gamma distribution:
\[ \lambda \sim \mbox{Gamma}(\mu,\sigma) \] where \(\mu\) and \(\sigma\) are the mean and standard deviation of the Gamma distribution. Next, we want to work out how these \(\mu\) and \(\sigma\) parameters translate into a proportion of positive individuals in a population of an infinite number of individuals at time \(t\) (positivity being defined as CFU \(\ge 1\)). This is done by looking at the cumulative probability of the Gamma distribution of the decay rate that leads to the quantile of the inter-individual CFU distribution that is the closest to 1. To do so, we need a function that expresses the distance of the quantile of the inter-individual CFU distribution to 1:
<- function(p, mu, sigma, N0, t) {
dist_to_1 abs(1 - cfu(N0, qgamma2(p, mu, sigma), t))
}
We then use this function in the following function that converts values for \(\mu\) and \(\sigma\) of the Gamma distribution of the inter-individual variability of exponential decay rate into the probability parameter of a binomial distribution that corresponds to the expected proportion of positive individuals in the population:
<- function(mu, sigma, N0 = 1e4, t, epsilon = 1e-16) {
probability <- cfu(N0, qgamma2(epsilon, mu, sigma), t)
upr if (upr < 1) {
return(0)
else {
} <- cfu(N0, qgamma2(1 - epsilon, mu, sigma), t)
lwr if (lwr > 1) {
return(1)
}else {
optimize(dist_to_1, 0:1, mu = mu, sigma = sigma, N0 = N0, t = t)$minimum
}
} }
Let’s try it with this function:
<- function(mu_val, sigma_val, t_val, col_val, leg) {
plot_cfu plot(NA, xlim = c(0, .5), ylim = 0:1,
xlab = expression(paste("within-host mean decay rate ", mu, " (/day)")),
ylab = "in infinite population",
main = paste("t =", t_val))
mtext("proportion positive", 2, 2.5)
walk2(sigma_val, col_val,
~ lines2(mu_val,
map_dbl(mu_val, probability, sigma = .x, t = t_val),
col = .y))
if (leg) {
legend2("bottomleft", col = col_val, lwd = line_width,
legend = sapply(sigma_val,
function(x)
as.expression(substitute(sigma == A,
list(A = as.name(x))))),
title = expression(paste("ind. variability ", sigma, " (/day):")))
} }
Let’s look at 2 time points:
<- par(mfrow = 1:2, cex = 1, plt = c(.2, .95, .2, .8))
opar
walk2(c(25, 45),
c(TRUE, FALSE),
~ plot_cfu(mu_val = seq3(.001, .5), # (/day)
sigma_val = c(.01, .05, .11), # (/day)
t_val = .x, # (day)
col_val = 2:4,
leg = .y))
par(opar)
6 ML estimation
6.1 Likelihood function
The first step consists in expressing a function that computes minus log likelihood:
<- - log(1e-16)
threshold
<- function(mu, sigma, N0, data, epsilon = 1e-16) {
mLL min(threshold,
- sum(dbinom(data$positive,
$total,
datamap_dbl(data$duration,
~ probability(mu, sigma, N0, .x * 30, epsilon)),
TRUE)))
}
Let’s try it:
mLL(mu = .3, sigma = .01, N0 = 1e4, data = filter(table2, group == "RIF"))
[1] 36.84136
6.2 Likelihood surfaces
Since we have only 2 parameters, we can easily have a look at the likelihood surface. A function that calculates the minus log-likelihood values:
<- function(drug, mu_val, sigma_val, N0 = 1e4, epsilon = 1e-16) {
calc_mLL <- expand.grid(mu_val, sigma_val) |>
mLL_val with(map2_dbl(Var1, Var2, mLL, N0 = N0, data = filter(table2, group == drug),
epsilon = epsilon)) |>
matrix(length(mu_val))
list(mu_val = mu_val, sigma_val = sigma_val, mLL_val = mLL_val)
}
Calculating the minus log-likelihood values:
<- calc_mLL("INH", seq2(.001, .05), seq2(.0001, .012))
mLL_INH <- calc_mLL("RIF", seq2(.001, .2), seq2(.0001, .1))
mLL_RIF <- calc_mLL("RIF+INH", seq2(.05, .15), seq2(.0001, .15))
mLL_RIFINH <- calc_mLL("RPT+INH", seq2(.001, .5), seq2(.0001, .1))
mLL_RPTINH <- calc_mLL("TMC", seq2(.08, .2), seq2(.0001, .15)) mLL_TMC
A function that draws the minus log-likelihood surfaces:
<- function(x, ...) {
show_mLL with(x, {
image(mu_val, sigma_val, mLL_val,
xlab = expression(paste("within-host mean decay rate ", mu, " (/day)")),
ylab = expression(paste("individual variability ", sigma, " (/day)")))
contour(mu_val, sigma_val, mLL_val, add = TRUE,
levels = c(.002, .01, .1, 1:10), ...)
box(bty = "o")
}) }
Let’s try it:
<- par(mfrow = c(3, 2), cex = 1, plt = c(.17, .93, .18, .89))
opar
walk2(list(mLL_INH, mLL_RIF, mLL_RIFINH, mLL_RPTINH, mLL_TMC),
c("INH", "RIF", "RIF+INH", "RPT+INH", "TMC"),
~ {show_mLL(.x); title2(.y)})
par(opar)
6.3 MLE by BFGS
A function that performs estimations using the BFGS quasi-Newton algorithm:
<- function(mu, sigma, lower, upper, drug, N0 = 1e4, epsilon = 1e-16) {
estimations <- filter(table2, group == drug)
data mle2(function(mu, sigma) mLL(mu, sigma, N0, data, epsilon),
list(mu = mu, sigma = sigma), "L-BFGS-B",
lower = lower, upper = upper)
}
Let’s use it:
<- list(
est RIF = estimations( mu = .08, sigma = .02,
lower = c(mu = .06, sigma = .01),
upper = c(mu = .09, sigma = .05),
drug = "RIF"),
RIF_INH = estimations( mu = .11, sigma = .04,
lower = c(mu = .09, sigma = .02),
upper = c(mu = .11, sigma = .05),
drug = "RIF+INH"),
RPT_INH = estimations( mu = .3 , sigma = .04,
lower = c(mu = .28, sigma = .01),
upper = c(mu = .32, sigma = .08),
drug = "RPT+INH"),
TMC = estimations( mu = .12, sigma = .04,
lower = c(mu = .11, sigma = .03),
upper = c(mu = .13, sigma = .06),
drug = "TMC"))
Which gives:
map_dfr(est, coef, .id = "group")
# A tibble: 4 × 3
group mu sigma
<chr> <dbl> <dbl>
1 RIF 0.0802 0.0192
2 RIF_INH 0.103 0.0317
3 RPT_INH 0.299 0.0383
4 TMC 0.121 0.0448
6.4 Model predictions
The following figure shows the estimated inter-individual variability of the exponential decay rate:
<- 512
nb_pts <- .45
x_max <- seq(0, x_max, le = nb_pts)
x_val <- rep(0, nb_pts)
y2
<- function(x, col) {
add_area <- dgamma2(x_val, x["mu"], x["sigma"])
y1 polygon2(x_val, y1, y2, col = col)
lines2(x_val, y1, col = col)
}
plot(NA, xlim = c(0, x_max), ylim = c(0, 25),
xlab = expression(paste("decay rate ", lambda, " (/day)")),
ylab = "density of probability")
|>
est map(coef) |>
walk2(col_treatments[-1], add_area)
legend2("topright", legend = treatments[-1], col = col_treatments[-1], lwd = 2)
A function that adds model predictions to a plot:
<- function(x, col, t_max, cfu0 = 1e4, alpha = .2, aggregation = 7) {
add_pred <- seq3(0, t_max)
t_val <- t_val / aggregation
t_val2 polygon2(t_val2,
cfu(cfu0, qgamma2(.025, x["mu"], x["sigma"]), t_val),
cfu(cfu0, qgamma2(.975, x["mu"], x["sigma"]), t_val), col = col)
lines2(t_val2, cfu(cfu0, qgamma2(.500, x["mu"], x["sigma"]), t_val), col = col)
}
The following function shows the model-predicted expected proportions of positive individuals in the population for a given treatment:
<- function(est, col, t_max = 6 * 30, agg = 7, log = FALSE,
predicted_cfu2 cfu0 = 1e4, ...) {
plot_frame(t_max = t_max, agg = agg, log = log, ...)
add_pred(coef(est), col, t_max, cfu0, aggregation = agg)
}
This function is a wrapper around predicted_cfu2()
that calls predicted_cfu2()
in 3 different ways:
<- function(est, col, ttl, ...) {
plot1drug_pred predicted_cfu2(est, col, xlab = "time (weeks)", ...)
predicted_cfu2(est, col, 30, 1, xlab = "time (days)", ...)
title3(ttl)
predicted_cfu2(est, col, xlab = "time (weeks)", log = TRUE, ...)
}
Let’s show the model prediction for all the estimated treatment effects:
<- par(mfrow = c(4, 3), cex = 1, plt = c(.24, .99, .2383, .9147))
opar pwalk(list(est, col_treatments[-1], treatments[-1]),
mgp = c(1.25, 0, 0))
plot1drug_pred, par(opar)
We can compare this model predictions at individual level with Figure 3 of 10.1164/rccm.201103-0397OC that shows CFU after 1 month of treatment, see Figure 7 below.
A function that adds model-predicted proportions of individual with CFU > 1 in a population of 15 individuals:
<- function(x, col, alpha = .2) {
add_mod_pred <- seq3(0, 6 * 30)
t_val <- map_dbl(t_val, ~ probability(x["mu"], x["sigma"],
p_val N0 = 1e4, t = .x, epsilon = 1e-16))
<- t_val / 30
t_val polygon2(t_val,
qbinom(.025, 15, p_val) / 15,
qbinom(.975, 15, p_val) / 15, col = col)
lines2(t_val, qbinom(.5, 15, p_val) / 15, col = col)
}
Another version of the plot_data_prop_est()
function that plot only 1 treatment:
<- function(ttmt, col, polyg = TRUE, connect = TRUE) {
plot_data_prop_est2 plot(NA, xlim = c(0, 6), ylim = 0:1,
xlab = "duration of treatment (months)",
ylab = "proportion of positive mice")
|>
table2a filter(group == ttmt) |>
plot_prop_est(col, polyg, connect)
}
Model-predicted proportion of positive individuals as a function of time in a population of 15 individuals and in comparison with experimental data:
<- par(mfrow = c(2, 2), cex = 1, plt = c(.17, .93, .2, .87))
opar for (i in 2:5) {
plot_data_prop_est2(treatments[i], col_treatments[i], FALSE, FALSE)
add_mod_pred(coef(est[[i - 1]]), col_treatments[i])
title2(treatments[i])
}par(opar)
7 Speculations in humans
Another version of the predicted_cfu()
function:
<- function(t_max = 6 * 30, agg = 7, log = FALSE,
predicted_cfu2 cfu0 = 10^(2:4), est_drug, col, ...) {
plot_frame(t_max = t_max, agg = agg, log = log, ...)
walk(cfu0, add_pred,
x = coef(est_drug), col = col, t_max = t_max, alpha = .2, aggregation = agg)
}
Let’s see what happens if we change the initial CFU:
<- par(mfrow = c(2, 2), cex = 1, plt = c(.17, .93, .2, .87))
opar
for (i in 2:5) {
predicted_cfu2(est_drug = est[[i - 1]], col = col_treatments[i],
log = TRUE, xlab = "time (weeks)")
title2(treatments[i])
}
par(opar)
The following function calculates the time of sterilization:
<- function(x, cfu0 = 1e4, p = .5) {
cfu1_time optimize(
function(t) abs(log(cfu(cfu0, qgamma2(p, x["mu"], x["sigma"]), t))),
c(0, 200)
$minimum
) }
The following function calculates the time of sterilization together with confidence interval:
<- function(x, cfu0) {
CFU0_effect_cfu1_time map_dfc(c(upper = .025, median = .5, lower = .975), cfu1_time, x = x, cfu0 = cfu0)
}
<- function(est, col, alpha = .2,
add_speculation v_val = unlist(map(10^(0:3), ~ .x * 1:10)),
h_val = 10 * 0:20) {
abline2(v = v_val); abline2(h = h_val)
axis(1); axis(2)
<- 10^seq3(0, 4)
cfu0_val <- map_dfr(cfu0_val, CFU0_effect_cfu1_time, x = coef(est))
out polygon2(cfu0_val, out$upper, out$lower, col = col)
lines2(cfu0_val, out$median, col = col)
}
<- par(mfrow = c(4, 3), cex = 1, plt = c(.24, .99, .2383, .9147))
opar
<- function(...) {
plot_tmp plot(..., axes = FALSE, xlab = "initial CFU", ylab = "time of clearance (days)")
}
for (i in 2:5) {
plot_tmp(NA, xlim = 10^c(0, 4), ylim = c(0, 200), log = "x")
add_speculation(est[[i - 1]], col_treatments[i])
plot_tmp(NA, xlim = c(1, 50), ylim = c(0, 100))
add_speculation(est[[i - 1]], col_treatments[i], v_val = seq(0, 50, 5))
title3(treatments[i])
plot_tmp(NA, xlim = c(1, 10), ylim = c(0, 50))
add_speculation(est[[i - 1]], col_treatments[i], h_val = seq(0, 50, 5))
}
par(opar)