Avoid them as much as possible by using intermediate variables. Example:
# 3 multiplications:
sir_equations1 <- function(time, variables, parameters) {
with(as.list(c(variables, parameters)), {
incidence <- beta * I * S
recovery <- gamma * I
dS <- -incidence
dI <- incidence - recovery
dR <- recovery
list(c(dS, dI, dR))
})
}
# 6 multiplications:
sir_equations2 <- function(time, variables, parameters) {
with(as.list(c(variables, parameters)), {
dS <- -beta * I * S
dI <- beta * I * S - gamma * I
dR <- gamma * I
list(c(dS, dI, dR))
})
}
initial_values <- c(S = 999, I = 1, R = 0)
time_values <- seq(0, 30, .1)
parameters_values <- c(beta = .004, gamma = .5)
bench::mark(
three_multiplications =
ode(y = initial_values, times = time_values, func = sir_equations1, parms = parameters_values),
six_multiplications =
ode(y = initial_values, times = time_values, func = sir_equations2, parms = parameters_values)
)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 three_multiplications 6.5ms 6.9ms 136. 694.9KB 21.3
## 2 six_multiplications 6.52ms 6.73ms 147. 96.4KB 18.7
return()
Function calls are computationally expensive. Do not use any that is
not necessary, for example return()
at the end of a
function:
# without return():
sir_equations1 <- function(time, variables, parameters) {
with(as.list(c(variables, parameters)), {
incidence <- beta * I * S
recovery <- gamma * I
dS <- -incidence
dI <- incidence - recovery
dR <- recovery
list(c(dS, dI, dR))
})
}
# with return():
sir_equations2 <- function(time, variables, parameters) {
with(as.list(c(variables, parameters)), {
incidence <- beta * I * S
recovery <- gamma * I
dS <- -incidence
dI <- incidence - recovery
dR <- recovery
return(list(c(dS, dI, dR)))
})
}
initial_values <- c(S = 999, I = 1, R = 0)
time_values <- seq(0, 30, .1)
parameters_values <- c(beta = .004, gamma = .5)
bench::mark(
without_return =
ode(y = initial_values, times = time_values, func = sir_equations1, parms = parameters_values),
with_return =
ode(y = initial_values, times = time_values, func = sir_equations2, parms = parameters_values)
)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 without_return 6.49ms 6.65ms 150. 85.5KB 18.4
## 2 with_return 6.69ms 6.92ms 144. 88.4KB 18.6
Built-in functions comes with a number of internal checks that are useful for interactive use. But if you know what you’re doing, you are better off rewriting some of these funtions from scractch, without the internal checks:
x <- runif(1000)
bench::mark(
from_scratch = sum(x) / length(x),
built_in = mean(x)
)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 from_scratch 2.04µs 2.13µs 447183. 0B 0
## 2 built_in 4.35µs 4.63µs 208619. 0B 20.9
If you’re generating a sequence with a step of 1, there are multiple ways to do so:
bench::mark(
seq.int(0, 1000000),
seq(0, 1000000),
0:1000000
)
## # A tibble: 3 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 seq.int(0, 1e+06) 237ns 268ns 3498610. 0B 0
## 2 seq(0, 1e+06) 3.69µs 4.09µs 231589. 0B 0
## 3 0:1e+06 128ns 145ns 5214732. 0B 0
How does this perform at different sizes (looking at
total_time
):
purrr::map(
10^(0:7),
function(n) {
bench::mark(
seq.int(0, n),
seq(0, n),
0:n
) %>%
select(expression, total_time) %>%
mutate(across(-expression, as.double)) %>%
mutate(expression = as.character(expression)) %>%
mutate(n = n)
}
) %>%
purrr::list_rbind() %>%
ggplot() +
geom_line(aes(x = log10(n), y = total_time, group = expression, color = expression)) +
scale_x_continuous("Size (log 10)", breaks = 0:7)
With a step lower than 1:
bench::mark(seq(1, 1000, by = .01),
(100:100000) / 100)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 seq(1, 1000, by = 0.01) 465µs 560µs 1373. 1.91MB 34.1
## 2 (100:1e+05)/100 123µs 176µs 4578. 1.14MB 46.1
With a step higher than 1:
bench::mark(seq(1, 1000, by = 3),
1 + 3 * 0:(1000 / 3))
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 seq(1, 1000, by = 3) 15.3µs 16.68µs 54796. 6.66KB 11.0
## 2 1 + 3 * 0:(1000/3) 1.1µs 2.61µs 396288. 4.01KB 0
What is a good and efficient way to create a polynomial function that gives Y from X, given known coefficients? The idea stems from trying to reproduce the temperature-dependent oviposition rate per female mosquito from a table of polynomial coefficients, from this paper.
coefs <- c(-1.51e-4, 1.02e-2, -2.12e01, 1.80, -5.40)
f1 <- function(x, coefs) {
degrees <- length(coefs) - 1
powers <- outer(x, seq(degrees, 0), "^")
multiplied <- t(apply(powers, 1, \(x) x * coefs))
apply(multiplied, 1, sum)
}
f2 <- function(x, coefs) {
powers <- (length(coefs) - 1):0
sapply(x, \(y) sum(coefs * y^powers))
}
bench::mark(
f1(22, coefs),
f2(22, coefs)
)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 f1(22, coefs) 50.8µs 53.7µs 18090. 32.5KB 14.6
## 2 f2(22, coefs) 12.9µs 13.9µs 69310. 0B 20.8
Many ways to access column values in a dataframe/tibble (result must be a vector):
df <- tibble(a = 1:1000, b = 1:1000)
bench::mark(
df$a,
pull(df, a),
df[["a"]],
)
## # A tibble: 3 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 "df$a" 1.38µs 1.56µs 610626. 0B 61.1
## 2 "pull(df, a)" 260.19µs 270.34µs 3659. 199KB 12.4
## 3 "df[[\"a\"]]" 4.87µs 5.22µs 183010. 0B 18.3