Advanced R - Notes (part2)
Advanced R (Hadley Wickham).
knitr::opts_chunk$set(error = TRUE)
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(rlang))
suppressPackageStartupMessages(library(lobstr))
suppressPackageStartupMessages(library(withr))
suppressPackageStartupMessages(library(glue))
# devtools::install_github("openpharma/simaerep@v0.3.1")
suppressPackageStartupMessages(library(simaerep))
Metaprogramming
Basics
We are ignoring environments for now.
Capturing/Quoting an Expression or many expression
ex1 <- rlang::expr(mean(mtcars$disp))
ex1
## mean(mtcars$disp)
ex2 <- quote(mean(mtcars$disp))
ex2
## mean(mtcars$disp)
exs1 <- rlang::exprs(mean(mtcars$disp), median(mtcars$disp))
exs1
## [[1]]
## mean(mtcars$disp)
##
## [[2]]
## median(mtcars$disp)
exs2 <- alist(mean(mtcars$disp), median(mtcars$disp))
exs2
## [[1]]
## mean(mtcars$disp)
##
## [[2]]
## median(mtcars$disp)
Evaluating/Unquoting an Expression
eval(ex1)
## [1] 230.7219
lapply(exs1, eval)
## [[1]]
## [1] 230.7219
##
## [[2]]
## [1] 196.3
Capturing an Expression as a String - Deparsing
str_ex1 <- deparse(ex1)
str_ex1
## [1] "mean(mtcars$disp)"
rlang::expr_text(ex1)
## [1] "mean(mtcars$disp)"
Parsing Strings and Evaluating Code from Strings
parse(text = str_ex1)
## expression(mean(mtcars$disp))
eval(parse(text = str_ex1))
## [1] 230.7219
rlang::parse_expr(str_ex1)
## mean(mtcars$disp)
eval(rlang::parse_expr(str_ex1))
## [1] 230.7219
Converting Strings to Symbols
rlang::sym("foo")
## foo
as.name("foo")
## foo
Symbols to Strings
deparse(rlang::sym("foo"))
## [1] "foo"
rlang::expr_text(as.name("foo"))
## [1] "foo"
Code is a tree
Code can be displayed as an abstract synthax tree (AST) with functions/expressions/symbols as nodes and literals/constants as leaves.
lobstr::ast(f(a, "b"))
## █─f
## ├─a
## └─"b"
lobstr::ast(f1(f2(a, b), f3(1, f4(2))))
## █─f1
## ├─█─f2
## │ ├─a
## │ └─b
## └─█─f3
## ├─1
## └─█─f4
## └─2
lobstr::ast(1 + 2 * 3)
## █─`+`
## ├─1
## └─█─`*`
## ├─2
## └─3
Subsetting and modifiyng Expressions
length(ex1)
## [1] 2
ex1[[1]]
## mean
ex1[[2]]
## mtcars$disp
ex1[[1]] <- quote(median)
ex1[[1]]
## median
ex1
## median(mtcars$disp)
eval(ex1)
## [1] 196.3
Quoting/Unquoting in Functions
here we want to capture the user-supplied expression
capture1 <- function(x) {
quote(x)
}
capture1(mean(mtcars$disp))
## x
capture2 <- function(x) {
rlang::enexpr(x)
}
capture2(mean(mtcars$disp))
## mean(mtcars$disp)
capture3 <- function(x) {
substitute(x)
}
capture3(mean(mtcars$disp))
## mean(mtcars$disp)
substitute() can also be used to alter expressions
substitute(mean(mtcars$disp), list(mean = quote(median), disp = quote(vs)))
## median(mtcars$vs)
bang bang !!
In rlang
when constructing expression we can chose to selectively unquote parts of the expression using !!
bang bang.
x <- rlang::expr(-1)
rlang::expr(f(!!x, y))
## f(-1, y)
a <- rlang::sym("foo")
b <- 1
rlang::expr(f(!!a, !!b))
## f(foo, 1)
bang bang bang !!!
selectively unquote lists of expressions using !!!
bang bang bang.
xs <- rlang::exprs(1, a, -b)
rlang::expr(f(!!!xs, y))
## f(1, a, -b, y)
# Or with names
ys <- rlang::set_names(xs, c("a", "b", "c"))
rlang::expr(f(!!!ys, d = 4))
## f(a = 1, b = a, c = -b, d = 4)
Dynamic Dots …
rlang::list2()
allows a function to use !!!
and :=
with its ...
argument, which unpacks the list.
f <- function(...) {
out <- rlang::list2(...)
rev(out)
}
x <- list(alpha = "first", omega = "last")
f(!!!x)
## $omega
## [1] "last"
##
## $alpha
## [1] "first"
f(x)
## [[1]]
## [[1]]$alpha
## [1] "first"
##
## [[1]]$omega
## [1] "last"
nm <- "foo"
f(!!nm := "bar")
## $foo
## [1] "bar"
Provide lists as function arguments
use rlang::exec()
which uses dynamic dots
# Directly
exec(mean, x = 1:10, na.rm = TRUE, trim = 0.1)
## [1] 5.5
# Indirectly
args <- list(x = 1:10, na.rm = TRUE, trim = 0.1)
exec(mean, !!!args)
## [1] 5.5
# Mixed
params <- list(na.rm = TRUE, trim = 0.1)
exec(mean, x = 1:10, !!!params)
## [1] 5.5
- use
do.Call
do.call(mean, list(x = 1:10, na.rm = TRUE, trim = 0.1))
## [1] 5.5
Quosures
When evaluating an expression we can control the environment. Quosures consist of an expression and an environment.
They are similar to formulas in base R
Formulas
we can extract environment and formula expressions using rlang
functions and evaluate both.
construct_formula <- function() {
env_x <- 3
~runif(env_x)
}
f <- construct_formula()
f
## ~runif(env_x)
## <environment: 0x7fe60b693388>
eval(rlang::f_rhs(f), envir = rlang::f_env(f))
## [1] 0.4454028 0.3837579 0.3728023
rlang
rlang::quo
andrlang::quos
matchrlang::expr()
andrlang::exprs()
rlang::eval_tidy()
evaluates quosures
foo <- function(x) enquo(x)
foo(a + b)
## <quosure>
## expr: ^a + b
## env: global
q1 <- new_quosure(expr(x + y), env(x = 1, y = 10))
eval_tidy(q1)
## [1] 11
Data Masks
Call variables from a data frame using expressions. eval_tidy()
excepts a dataframe in addition to the environment in the closure which it unpacks and makes available to the evaluation of the expression.
eval()
excepts a dataframe as an environment
with2 <- function(data, expr) {
expr <- enquo(expr)
eval_tidy(expr, data = data)
}
df <- data.frame(y = 1:10)
x <- 100
with2(df, x * y)
## [1] 100 200 300 400 500 600 700 800 900 1000
with3 <- function(data, expr) {
expr <- substitute(expr)
eval(expr, envir = data)
}
with3(df, x * y)
## [1] 100 200 300 400 500 600 700 800 900 1000
for rlang
quosures we can solve ambiguity between the dataframe and the environment using .data
and .env
df <- data.frame(y = 1:10, x = 11:20)
x <- 100
with2(df, .data$x + .data$y)
## [1] 12 14 16 18 20 22 24 26 28 30
with2(df, .env$x + .data$y)
## [1] 101 102 103 104 105 106 107 108 109 110
# does not work with the base version
with3(df, .data$x)
## NULL
Base Function Using Datamasks
subset
similar to dplyr::filter
subset2 <- function(df, expr) {
qu <- enquo(expr)
bool <- eval_tidy(qu, df)
browser
stopifnot(is.logical(bool))
df[bool,]
}
subset2(mtcars, cyl == 6)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
transform
similar to dplyr::mutate
transform2 <- function(df, ...) {
dots <- enquos(...)
for (i in seq_along(dots)) {
col <- names(dots)[i]
df[[col]] <- eval_tidy(dots[[i]], df)
}
return(df)
}
df <- data.frame(x = 1:10, y = 11: 10)
transform2(df, z = x * y, z0 = z + 1)
## x y z z0
## 1 1 11 11 12
## 2 2 10 20 21
## 3 3 11 33 34
## 4 4 10 40 41
## 5 5 11 55 56
## 6 6 10 60 61
## 7 7 11 77 78
## 8 8 10 80 81
## 9 9 11 99 100
## 10 10 10 100 101
select
rewrite of dplyr::select()
select2 <- function(data, ...) {
dots <- enquos(...)
vars <- as.list(set_names(seq_along(data), names(data)))
cols <- unlist(map(dots, eval_tidy, vars))
data[, cols, drop = FALSE]
}
df <- data.frame(a = 1, b = 2, c = 3, d = 4, e = 5)
select2(df, b:d, a)
## b c d a
## 1 2 3 4 1
Programming with rlang/dplyr
Pass expressions onto functions using expression
We cannot pass them directly. We need to quote and selectively unquote
We can use quosures and bangs
sample_col <- function(df, col, n = 5) {
qu_col <- enquo(col)
head(select2(df, !! qu_col), n)
}
sample_col(mtcars, disp)
## disp
## Mazda RX4 160
## Mazda RX4 Wag 160
## Datsun 710 108
## Hornet 4 Drive 258
## Hornet Sportabout 360
sample_cols <- function(df, ..., n = 5) {
qu_cols <- enquos(...)
head(select2(df, !!! qu_cols), n)
}
sample_cols(mtcars, disp, cyl)
## disp cyl
## Mazda RX4 160 6
## Mazda RX4 Wag 160 6
## Datsun 710 108 4
## Hornet 4 Drive 258 6
## Hornet Sportabout 360 8
Or expressions
sample_col <- function(df, col, n = 5) {
qu_col <- enexpr(col)
head(select(df, !! qu_col), n)
}
sample_col(mtcars, disp)
## disp
## Mazda RX4 160
## Mazda RX4 Wag 160
## Datsun 710 108
## Hornet 4 Drive 258
## Hornet Sportabout 360
sample_cols <- function(df, ..., n = 5) {
qu_cols <- enexprs(...)
head(select2(df, !!! qu_cols), n)
}
sample_cols(mtcars, disp, cyl)
## disp cyl
## Mazda RX4 160 6
## Mazda RX4 Wag 160 6
## Datsun 710 108 4
## Hornet 4 Drive 258 6
## Hornet Sportabout 360 8
Or Curly Curl
**new standard as of rlang 0.4.0
sample_col <- function(df, col, n = 5) {
head(select2(df, {{col}}), n)
}
sample_col(mtcars, disp)
## disp
## Mazda RX4 160
## Mazda RX4 Wag 160
## Datsun 710 108
## Hornet 4 Drive 258
## Hornet Sportabout 360
sample_cols <- function(df, ..., n = 5) {
head(select2(df, ...), n)
}
sample_cols(mtcars, disp, cyl)
## disp cyl
## Mazda RX4 160 6
## Mazda RX4 Wag 160 6
## Datsun 710 108 4
## Hornet 4 Drive 258 6
## Hornet Sportabout 360 8
Pass strings onto functions using expression
use .data
Applications
Fetching Model Training Data from Global Environment
save the call using match.call()
slope_model <- function(data, form) {
m <- lm(form, data)
structure(
list(
slope = m$coefficients[[2]],
intercept = m$coefficients[[1]],
call = match.call()
),
class = "slope_model"
)
}
plot.slope_model <- function(m) {
stopifnot(inherits(m, "slope_model"))
data_expr <- m$call[["data"]]
stopifnot(exists(deparse(data_expr)))
cols <- colnames(eval(data_expr))
form <- m$call[["form"]]
y_expr <- rlang::f_lhs(form)
x_expr <- rlang::f_rhs(form)
y_str <- deparse(y_expr)
x_str <- deparse(x_expr)
stopifnot(c(y_str, y_str) %in% cols)
eval(data_expr) %>%
select({{y_expr}}, {{x_expr}}) %>%
ggplot(aes({{x_expr}}, {{y_expr}})) +
geom_point() +
geom_abline(slope = m$slope, intercept = m$intercept) +
theme_minimal()
}
m <- slope_model(mtcars, disp ~ hp)
m
## $slope
## [1] 1.42977
##
## $intercept
## [1] 20.99248
##
## $call
## slope_model(data = mtcars, form = disp ~ hp)
##
## attr(,"class")
## [1] "slope_model"
plot(m)
pryr::object_size(m)
## 1,520 B
pryr::object_size({m$data <- mtcars})
## 7,208 B
Correcting Calls for Wrapped Modelling Functions
when we write a wrapper the saved call cannot be used to reconstruct the actual call. And methods relying on it will not work
wr_slope <- function(data, form) {
slope_model(data, form)
}
m <- wr_slope(mtcars, disp ~ hp)
m
## $slope
## [1] 1.42977
##
## $intercept
## [1] 20.99248
##
## $call
## slope_model(data = data, form = form)
##
## attr(,"class")
## [1] "slope_model"
plot(m)
## Error: `x` must be a formula
we need to reconstruct a new call inside the wrapper and pass it to the modelling function.
wr_slope <- function(data, form) {
data <- enexpr(data)
form <- enexpr(form)
new_call <- expr(slope_model(!!data, !!form))
eval(new_call)
}
m <- wr_slope(mtcars, disp ~ hp)
m
## $slope
## [1] 1.42977
##
## $intercept
## [1] 20.99248
##
## $call
## slope_model(data = mtcars, form = disp ~ hp)
##
## attr(,"class")
## [1] "slope_model"
plot(m)
attaching the call to the model object is risky because if the modeling function is used by do.call
or purr::map
we risk attaching the entire data to the call.
do.call(slope_model, list(mtcars, disp ~ hp))
## $slope
## [1] 1.42977
##
## $intercept
## [1] 20.99248
##
## $call
## (function(data, form) {
## m <- lm(form, data)
## structure(
## list(
## slope = m$coefficients[[2]],
## intercept = m$coefficients[[1]],
## call = match.call()
## ),
## class = "slope_model"
## )
## })(data = list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3,
## 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4,
## 30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8,
## 19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8,
## 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4),
## disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8,
## 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7,
## 71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145,
## 301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95,
## 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150,
## 150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9,
## 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,
## 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76,
## 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11
## ), wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19,
## 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2,
## 1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14,
## 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 17.02, 18.61,
## 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6,
## 18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87,
## 17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6
## ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0,
## 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1,
## 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
## 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3,
## 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3,
## 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4,
## 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1,
## 2, 2, 4, 6, 8, 2)), form = disp ~ hp)
##
## attr(,"class")
## [1] "slope_model"
do.call(wr_slope, list(mtcars, disp ~ hp))
## $slope
## [1] 1.42977
##
## $intercept
## [1] 20.99248
##
## $call
## slope_model(data = list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1,
## 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7,
## 32.4, 30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4,
## 15.8, 19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6,
## 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8,
## 4), disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8,
## 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7,
## 71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145, 301,
## 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123,
## 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, 150, 245,
## 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, 3.9, 3.85,
## 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 3.07, 3.07, 3.07,
## 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, 3.15, 3.73, 3.08,
## 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11), wt = c(2.62, 2.875,
## 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, 3.15, 3.44, 3.44, 4.07,
## 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, 1.615, 1.835, 2.465, 3.52,
## 3.435, 3.84, 3.845, 1.935, 2.14, 1.513, 3.17, 2.77, 3.57, 2.78
## ), qsec = c(16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84,
## 20, 22.9, 18.3, 18.9, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 19.47,
## 18.52, 19.9, 20.01, 16.87, 17.3, 15.41, 17.05, 18.9, 16.7, 16.9,
## 14.5, 15.5, 14.6, 18.6), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1,
## 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
## 1), am = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
## 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4,
## 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3,
## 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, 2,
## 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 2, 2, 4,
## 6, 8, 2)), form = disp ~ hp)
##
## attr(,"class")
## [1] "slope_model"
do.call(lm, list(disp ~ hp, mtcars))
##
## Call:
## (function (formula, data, subset, weights, na.action, method = "qr",
## model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE,
## contrasts = NULL, offset, ...)
## {
## ret.x <- x
## ret.y <- y
## cl <- match.call()
## mf <- match.call(expand.dots = FALSE)
## m <- match(c("formula", "data", "subset", "weights", "na.action",
## "offset"), names(mf), 0L)
## mf <- mf[c(1L, m)]
## mf$drop.unused.levels <- TRUE
## mf[[1L]] <- quote(stats::model.frame)
## mf <- eval(mf, parent.frame())
## if (method == "model.frame")
## return(mf)
## else if (method != "qr")
## warning(gettextf("method = '%s' is not supported. Using 'qr'",
## method), domain = NA)
## mt <- attr(mf, "terms")
## y <- model.response(mf, "numeric")
## w <- as.vector(model.weights(mf))
## if (!is.null(w) && !is.numeric(w))
## stop("'weights' must be a numeric vector")
## offset <- model.offset(mf)
## mlm <- is.matrix(y)
## ny <- if (mlm)
## nrow(y)
## else length(y)
## if (!is.null(offset)) {
## if (!mlm)
## offset <- as.vector(offset)
## if (NROW(offset) != ny)
## stop(gettextf("number of offsets is %d, should equal %d (number of observations)",
## NROW(offset), ny), domain = NA)
## }
## if (is.empty.model(mt)) {
## x <- NULL
## z <- list(coefficients = if (mlm) matrix(NA_real_, 0,
## ncol(y)) else numeric(), residuals = y, fitted.values = 0 *
## y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w !=
## 0) else ny)
## if (!is.null(offset)) {
## z$fitted.values <- offset
## z$residuals <- y - offset
## }
## }
## else {
## x <- model.matrix(mt, mf, contrasts)
## z <- if (is.null(w))
## lm.fit(x, y, offset = offset, singular.ok = singular.ok,
## ...)
## else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
## ...)
## }
## class(z) <- c(if (mlm) "mlm", "lm")
## z$na.action <- attr(mf, "na.action")
## z$offset <- offset
## z$contrasts <- attr(x, "contrasts")
## z$xlevels <- .getXlevels(mt, mf)
## z$call <- cl
## z$terms <- mt
## if (model)
## z$model <- mf
## if (ret.x)
## z$x <- x
## if (ret.y)
## z$y <- y
## if (!qr)
## z$qr <- NULL
## z
## })(formula = disp ~ hp, data = structure(list(mpg = c(21, 21,
## 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3,
## 15.2, 10.4, 10.4, 14.7, 32.4, 30.4, 33.9, 21.5, 15.5, 15.2, 13.3,
## 19.2, 27.3, 26, 30.4, 15.8, 19.7, 15, 21.4), cyl = c(6, 6, 4,
## 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8,
## 8, 4, 4, 4, 8, 6, 8, 4), disp = c(160, 160, 108, 258, 360, 225,
## 360, 146.7, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460,
## 440, 78.7, 75.7, 71.1, 120.1, 318, 304, 350, 400, 79, 120.3,
## 95.1, 351, 145, 301, 121), hp = c(110, 110, 93, 110, 175, 105,
## 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52,
## 65, 97, 150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 109),
## drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92,
## 3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93,
## 4.22, 3.7, 2.76, 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22,
## 3.62, 3.54, 4.11), wt = c(2.62, 2.875, 2.32, 3.215, 3.44,
## 3.46, 3.57, 3.19, 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25,
## 5.424, 5.345, 2.2, 1.615, 1.835, 2.465, 3.52, 3.435, 3.84,
## 3.845, 1.935, 2.14, 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46,
## 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3,
## 18.9, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 19.47, 18.52,
## 19.9, 20.01, 16.87, 17.3, 15.41, 17.05, 18.9, 16.7, 16.9,
## 14.5, 15.5, 14.6, 18.6), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1,
## 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1,
## 0, 0, 0, 1), am = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
## 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1),
## gear = c(4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3,
## 3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4,
## 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1,
## 1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2)), row.names = c("Mazda RX4",
## "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout",
## "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280",
## "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
## "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
## "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
## "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
## "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
## "Volvo 142E"), class = "data.frame"))
##
## Coefficients:
## (Intercept) hp
## 20.99 1.43
This is why a size check for the saving the call is required in the tidymodels convention guide
df_m <- mtcars %>%
group_by(cyl) %>%
nest() %>%
mutate(m = map(data, wr_slope, disp ~ hp))
map(df_m$m, ~ .$call)
## [[1]]
## slope_model(data = list(mpg = c(21, 21, 21.4, 18.1, 19.2, 17.8,
## 19.7), disp = c(160, 160, 258, 225, 167.6, 167.6, 145), hp = c(110,
## 110, 110, 105, 123, 123, 175), drat = c(3.9, 3.9, 3.08, 2.76,
## 3.92, 3.92, 3.62), wt = c(2.62, 2.875, 3.215, 3.46, 3.44, 3.44,
## 2.77), qsec = c(16.46, 17.02, 19.44, 20.22, 18.3, 18.9, 15.5),
## vs = c(0, 0, 1, 1, 1, 1, 0), am = c(1, 1, 0, 0, 0, 0, 1),
## gear = c(4, 4, 3, 3, 4, 4, 5), carb = c(4, 4, 1, 1, 4, 4,
## 6)), form = disp ~ hp)
##
## [[2]]
## slope_model(data = list(mpg = c(22.8, 24.4, 22.8, 32.4, 30.4,
## 33.9, 21.5, 27.3, 26, 30.4, 21.4), disp = c(108, 146.7, 140.8,
## 78.7, 75.7, 71.1, 120.1, 79, 120.3, 95.1, 121), hp = c(93, 62,
## 95, 66, 52, 65, 97, 66, 91, 113, 109), drat = c(3.85, 3.69, 3.92,
## 4.08, 4.93, 4.22, 3.7, 4.08, 4.43, 3.77, 4.11), wt = c(2.32,
## 3.19, 3.15, 2.2, 1.615, 1.835, 2.465, 1.935, 2.14, 1.513, 2.78
## ), qsec = c(18.61, 20, 22.9, 19.47, 18.52, 19.9, 20.01, 18.9,
## 16.7, 16.9, 18.6), vs = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1), am = c(1,
## 0, 0, 1, 1, 1, 0, 1, 1, 1, 1), gear = c(4, 4, 4, 4, 4, 4, 3,
## 4, 5, 5, 4), carb = c(1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 2)), form = disp ~
## hp)
##
## [[3]]
## slope_model(data = list(mpg = c(18.7, 14.3, 16.4, 17.3, 15.2,
## 10.4, 10.4, 14.7, 15.5, 15.2, 13.3, 19.2, 15.8, 15), disp = c(360,
## 360, 275.8, 275.8, 275.8, 472, 460, 440, 318, 304, 350, 400,
## 351, 301), hp = c(175, 245, 180, 180, 180, 205, 215, 230, 150,
## 150, 245, 175, 264, 335), drat = c(3.15, 3.21, 3.07, 3.07, 3.07,
## 2.93, 3, 3.23, 2.76, 3.15, 3.73, 3.08, 4.22, 3.54), wt = c(3.44,
## 3.57, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 3.52, 3.435, 3.84,
## 3.845, 3.17, 3.57), qsec = c(17.02, 15.84, 17.4, 17.6, 18, 17.98,
## 17.82, 17.42, 16.87, 17.3, 15.41, 17.05, 14.5, 14.6), vs = c(0,
## 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), am = c(0, 0, 0, 0, 0,
## 0, 0, 0, 0, 0, 0, 0, 1, 1), gear = c(3, 3, 3, 3, 3, 3, 3, 3,
## 3, 3, 3, 3, 5, 5), carb = c(2, 4, 3, 3, 3, 4, 4, 4, 2, 2, 4,
## 2, 4, 8)), form = disp ~ hp)
map(df_m$m, plot)
## Error in exists(deparse(data_expr)): first argument has length > 1
instead attach arguments individually
training_data <- function(name, df) {
structure(
list(
name = stringr::str_trunc(name, width = 120)[[1]],
cols = colnames(df),
dim = dim(df)
),
class = "train_data"
)
}
match_training_data <- function(df, train_data) {
stopifnot(is.data.frame(df))
stopifnot(all(train_data$cols %in% colnames(df)))
df <- df[, train_data$cols]
stopifnot(all(dim(df) == train_data$dim))
return(TRUE)
}
slope_model2 <- function(data, form) {
y <- deparse(rlang::f_lhs(form))
x <- deparse(rlang::f_rhs(form))
data_name <- deparse(rlang::enexpr(data))
train_data <- training_data(data_name, data[, c(x, y)])
m <- lm(form, data)
structure(
list(
slope = m$coefficients[[2]],
intercept = m$coefficients[[1]],
train_data = train_data,
x = x,
y = y
),
class = "slope_model"
)
}
plot.slope_model <- function(m, data = NULL) {
stopifnot(inherits(m, "slope_model"))
if (is.null(data) & exists(m$train_data$name)) {
df_train <- eval(parse(text = m$train_data$name))
match_training_data(df_train, m$train_data)
} else if (! is.null(data)) {
df_train <- data
match_training_data(df_train, m$train_data)
} else {
stop("training data not found")
}
ggplot(df_train, aes_string(m$x, m$y)) +
geom_point() +
geom_abline(slope = m$slope, intercept = m$intercept) +
theme_minimal()
}
m <- slope_model2(mtcars, disp ~ hp)
str(m)
## List of 5
## $ slope : num 1.43
## $ intercept : num 21
## $ train_data:List of 3
## ..$ name: chr "mtcars"
## ..$ cols: chr [1:2] "hp" "disp"
## ..$ dim : int [1:2] 32 2
## ..- attr(*, "class")= chr "train_data"
## $ x : chr "hp"
## $ y : chr "disp"
## - attr(*, "class")= chr "slope_model"
plot(m)
check wrapper compatibility
wr_slope <- function(data, form) {
slope_model2(data, form)
}
m <- wr_slope(mtcars, disp ~ hp)
str(m)
## List of 5
## $ slope : num 1.43
## $ intercept : num 21
## $ train_data:List of 3
## ..$ name: chr "data"
## ..$ cols: chr [1:2] "hp" "disp"
## ..$ dim : int [1:2] 32 2
## ..- attr(*, "class")= chr "train_data"
## $ x : chr "hp"
## $ y : chr "disp"
## - attr(*, "class")= chr "slope_model"
plot(m)
## Error in match_training_data(df_train, m$train_data): is.data.frame(df) is not TRUE
plot(m, mtcars)
still need it
wr_slope2 <- function(data, form) {
data <- enexpr(data)
form <- enexpr(form)
new_call <- expr(slope_model2(!!data, !!form))
eval(new_call)
}
m <- wr_slope2(mtcars, disp ~ hp)
str(m)
## List of 5
## $ slope : num 1.43
## $ intercept : num 21
## $ train_data:List of 3
## ..$ name: chr "mtcars"
## ..$ cols: chr [1:2] "hp" "disp"
## ..$ dim : int [1:2] 32 2
## ..- attr(*, "class")= chr "train_data"
## $ x : chr "hp"
## $ y : chr "disp"
## - attr(*, "class")= chr "slope_model"
plot(m)
do.call
m <- do.call(slope_model2, list(mtcars, disp ~ hp))
str(m)
## List of 5
## $ slope : num 1.43
## $ intercept : num 21
## $ train_data:List of 3
## ..$ name: chr "structure(list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, "
## ..$ cols: chr [1:2] "hp" "disp"
## ..$ dim : int [1:2] 32 2
## ..- attr(*, "class")= chr "train_data"
## $ x : chr "hp"
## $ y : chr "disp"
## - attr(*, "class")= chr "slope_model"
plot(m)
## Error in plot.slope_model(m): training data not found
plot(m, mtcars)
Printing R Code for ggplots Plots
we want to display the r code necessary to render a plot
plot_vars <- function(df, x, y, smooth = FALSE) {
df <- enexpr(df)
x <- enexpr(x)
y <- enexpr(y)
ex <- expr(
ggplot(!!df, aes(!! x, !!y)) +
geom_point()
)
if(smooth){
ex <- expr(!! ex + geom_smooth())
}
return(ex)
}
ex <- plot_vars(mtcars, disp, hp)
cat(deparse(ex))
## ggplot(mtcars, aes(disp, hp)) + geom_point()
eval(plot_vars(mtcars, disp, hp))
ex_smooth <- plot_vars(mtcars, disp, hp, smooth = TRUE)
cat(deparse(ex_smooth))
## ggplot(mtcars, aes(disp, hp)) + geom_point() + geom_smooth()
eval(ex_smooth)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'