Introduction

Overview

Package provides pipe-style interface for data.table package. It preserves all data.table features without significant impact on performance. let and take functions are simplified interfaces for most common data manipulation tasks.

  • To select rows from data: rows(mtcars, am==0)
  • To select columns from data: columns(mtcars, mpg, vs:carb)
  • To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
  • To aggregate all non-grouping columns: take_all(mtcars, mean, by = am)
  • To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)
  • To get total summary skip by argument: take_all(mtcars, mean)
  • Use magrittr pipe %>% to chain several operations:
     mtcars %>%
        let(mpg_hp = mpg/hp) %>%
        take(mean(mpg_hp), by = am)
  • To modify variables or add new variables:
      mtcars %>%
         let(new_var = 42,
             new_var2 = new_var*hp) %>%
         head()
  • To drop variable assign NULL: let(mtcars, am = NULL) %>% head()
  • To modify all non-grouping variables:
    iris %>%
      let_all(
          scaled = (.x - mean(.x))/sd(.x),
          by = Species) %>%
       head()
  • To aggregate all variables conditionally on name:
    iris %>%
      take_all(
          mean = if(startsWith(.name, "Sepal")) mean(.x),
          median = if(startsWith(.name, "Petal")) median(.x),
          by = Species
      )
  • For parametric assignment use :=:
    new_var = "my_var"
    old_var = "mpg"
    mtcars %>%
        let((new_var) := get(old_var)*2) %>%
        head()
     
    # or,  
    expr = quote(mean(cyl))
    mtcars %>% 
        let((new_var) := eval(expr)) %>% 
        head()
    
    # the same with `take` 
    by_var = "vs,am"
    take(mtcars, (new_var) := eval(expr), by = by_var)

query_if function translates its arguments one-to-one to [.data.table method. Additionally there are some conveniences such as automatic data.frame conversion to data.table.

vlookup & xlookup

Let’s make datasets for lookups:

workers = fread("
    name company
    Nick Acme
    John Ajax
    Daniela Ajax
")

positions = fread("
    name position
    John designer
    Daniela engineer
    Cathie manager
")

# xlookup
workers = let(workers,
  position = xlookup(name, positions$name, positions$position)
)

# vlookup
# by default we search in the first column and return values from second column
workers = let(workers,
  position = vlookup(name, positions, no_match = "Not found")
)

# the same 
workers = let(workers,
  position = vlookup(name, positions, 
                     result_column = "position", 
                     no_match = "Not found") # or, result_column = 2 
)

head(workers)
##       name company  position
##     <char>  <char>    <char>
## 1:    Nick    Acme Not found
## 2:    John    Ajax  designer
## 3: Daniela    Ajax  engineer

More examples

We will use for demonstartion well-known mtcars dataset and some examples from dplyr package.

library(maditr)
data(mtcars)

# Newly created variables are available immediately
mtcars %>%
    let(
        cyl2 = cyl * 2,
        cyl4 = cyl2 * 2
    ) %>% head()
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4    12
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4    12
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1     8
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1    12
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2    16
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1    12
##     cyl4
##    <num>
## 1:    24
## 2:    24
## 3:    16
## 4:    24
## 5:    32
## 6:    24
# You can also use let() to remove variables and
# modify existing variables
mtcars %>%
    let(
        mpg = NULL,
        disp = disp * 0.0163871 # convert to litres
    ) %>% head()
##      cyl     disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num>    <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:     6 2.621936   110  3.90 2.620 16.46     0     1     4     4
## 2:     6 2.621936   110  3.90 2.875 17.02     0     1     4     4
## 3:     4 1.769807    93  3.85 2.320 18.61     1     1     4     1
## 4:     6 4.227872   110  3.08 3.215 19.44     1     0     3     1
## 5:     8 5.899356   175  3.15 3.440 17.02     0     0     3     2
## 6:     6 3.687098   105  2.76 3.460 20.22     1     0     3     1
# window functions are useful for grouped computations
mtcars %>%
    let(rank = rank(-mpg, ties.method = "min"),
        by = cyl) %>%
    head()
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  rank
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <int>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4     2
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4     2
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1     8
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1     1
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2     2
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1     6
# You can drop variables by setting them to NULL
mtcars %>%
    let(cyl = NULL) %>%
    head()
##      mpg  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.0   160   110  3.90 2.620 16.46     0     1     4     4
## 2:  21.0   160   110  3.90 2.875 17.02     0     1     4     4
## 3:  22.8   108    93  3.85 2.320 18.61     1     1     4     1
## 4:  21.4   258   110  3.08 3.215 19.44     1     0     3     1
## 5:  18.7   360   175  3.15 3.440 17.02     0     0     3     2
## 6:  18.1   225   105  2.76 3.460 20.22     1     0     3     1
# keeps all existing variables
mtcars %>%
    let(displ_l = disp / 61.0237) %>%
    head()
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  displ_l
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>    <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4 2.621932
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4 2.621932
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1 1.769804
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1 4.227866
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2 5.899347
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1 3.687092
# keeps only the variables you create
mtcars %>%
    take(displ_l = disp / 61.0237) %>% 
    head()
##     displ_l
##       <num>
## 1: 2.621932
## 2: 2.621932
## 3: 1.769804
## 4: 4.227866
## 5: 5.899347
## 6: 3.687092
# can refer to both contextual variables and variable names:
var = 100
mtcars %>%
    let(cyl = cyl * var) %>%
    head()
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.0   600   160   110  3.90 2.620 16.46     0     1     4     4
## 2:  21.0   600   160   110  3.90 2.875 17.02     0     1     4     4
## 3:  22.8   400   108    93  3.85 2.320 18.61     1     1     4     1
## 4:  21.4   600   258   110  3.08 3.215 19.44     1     0     3     1
## 5:  18.7   800   360   175  3.15 3.440 17.02     0     0     3     2
## 6:  18.1   600   225   105  2.76 3.460 20.22     1     0     3     1
# select rows
mtcars %>%
    rows(am==0) %>% 
    head()
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
## 2:  18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2
## 3:  18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1
## 4:  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4
## 5:  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
## 6:  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2
# select rows with compound condition
mtcars %>%
    rows(am==0 & mpg>mean(mpg))
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1:  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
## 2:  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
## 3:  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2
## 4:  21.5     4 120.1    97  3.70 2.465 20.01     1     0     3     1
# select columns
mtcars %>% 
    columns(vs:carb, cyl)
##                     vs am gear carb cyl
## Mazda RX4            0  1    4    4   6
## Mazda RX4 Wag        0  1    4    4   6
## Datsun 710           1  1    4    1   4
## Hornet 4 Drive       1  0    3    1   6
## Hornet Sportabout    0  0    3    2   8
## Valiant              1  0    3    1   6
## Duster 360           0  0    3    4   8
## Merc 240D            1  0    4    2   4
## Merc 230             1  0    4    2   4
## Merc 280             1  0    4    4   6
## Merc 280C            1  0    4    4   6
## Merc 450SE           0  0    3    3   8
## Merc 450SL           0  0    3    3   8
## Merc 450SLC          0  0    3    3   8
## Cadillac Fleetwood   0  0    3    4   8
## Lincoln Continental  0  0    3    4   8
## Chrysler Imperial    0  0    3    4   8
## Fiat 128             1  1    4    1   4
## Honda Civic          1  1    4    2   4
## Toyota Corolla       1  1    4    1   4
## Toyota Corona        1  0    3    1   4
## Dodge Challenger     0  0    3    2   8
## AMC Javelin          0  0    3    2   8
## Camaro Z28           0  0    3    4   8
## Pontiac Firebird     0  0    3    2   8
## Fiat X1-9            1  1    4    1   4
## Porsche 914-2        0  1    5    2   4
## Lotus Europa         1  1    5    2   4
## Ford Pantera L       0  1    5    4   8
## Ferrari Dino         0  1    5    6   6
## Maserati Bora        0  1    5    8   8
## Volvo 142E           1  1    4    2   4
mtcars %>% 
    columns(-am, -cyl)    
##                      mpg  disp  hp drat    wt  qsec vs gear carb
## Mazda RX4           21.0 160.0 110 3.90 2.620 16.46  0    4    4
## Mazda RX4 Wag       21.0 160.0 110 3.90 2.875 17.02  0    4    4
## Datsun 710          22.8 108.0  93 3.85 2.320 18.61  1    4    1
## Hornet 4 Drive      21.4 258.0 110 3.08 3.215 19.44  1    3    1
## Hornet Sportabout   18.7 360.0 175 3.15 3.440 17.02  0    3    2
## Valiant             18.1 225.0 105 2.76 3.460 20.22  1    3    1
## Duster 360          14.3 360.0 245 3.21 3.570 15.84  0    3    4
## Merc 240D           24.4 146.7  62 3.69 3.190 20.00  1    4    2
## Merc 230            22.8 140.8  95 3.92 3.150 22.90  1    4    2
## Merc 280            19.2 167.6 123 3.92 3.440 18.30  1    4    4
## Merc 280C           17.8 167.6 123 3.92 3.440 18.90  1    4    4
## Merc 450SE          16.4 275.8 180 3.07 4.070 17.40  0    3    3
## Merc 450SL          17.3 275.8 180 3.07 3.730 17.60  0    3    3
## Merc 450SLC         15.2 275.8 180 3.07 3.780 18.00  0    3    3
## Cadillac Fleetwood  10.4 472.0 205 2.93 5.250 17.98  0    3    4
## Lincoln Continental 10.4 460.0 215 3.00 5.424 17.82  0    3    4
## Chrysler Imperial   14.7 440.0 230 3.23 5.345 17.42  0    3    4
## Fiat 128            32.4  78.7  66 4.08 2.200 19.47  1    4    1
## Honda Civic         30.4  75.7  52 4.93 1.615 18.52  1    4    2
## Toyota Corolla      33.9  71.1  65 4.22 1.835 19.90  1    4    1
## Toyota Corona       21.5 120.1  97 3.70 2.465 20.01  1    3    1
## Dodge Challenger    15.5 318.0 150 2.76 3.520 16.87  0    3    2
## AMC Javelin         15.2 304.0 150 3.15 3.435 17.30  0    3    2
## Camaro Z28          13.3 350.0 245 3.73 3.840 15.41  0    3    4
## Pontiac Firebird    19.2 400.0 175 3.08 3.845 17.05  0    3    2
## Fiat X1-9           27.3  79.0  66 4.08 1.935 18.90  1    4    1
## Porsche 914-2       26.0 120.3  91 4.43 2.140 16.70  0    5    2
## Lotus Europa        30.4  95.1 113 3.77 1.513 16.90  1    5    2
## Ford Pantera L      15.8 351.0 264 4.22 3.170 14.50  0    5    4
## Ferrari Dino        19.7 145.0 175 3.62 2.770 15.50  0    5    6
## Maserati Bora       15.0 301.0 335 3.54 3.570 14.60  0    5    8
## Volvo 142E          21.4 121.0 109 4.11 2.780 18.60  1    4    2
# regular expression pattern
columns(iris, "^Petal") %>% head() # variables which start from 'Petal'
##   Petal.Length Petal.Width
## 1          1.4         0.2
## 2          1.4         0.2
## 3          1.3         0.2
## 4          1.5         0.2
## 5          1.4         0.2
## 6          1.7         0.4
columns(iris, "Width$") %>% head() # variables which end with 'Width'
##   Sepal.Width Petal.Width
## 1         3.5         0.2
## 2         3.0         0.2
## 3         3.2         0.2
## 4         3.1         0.2
## 5         3.6         0.2
## 6         3.9         0.4
# move Species variable to the front
# pattern "^." matches all variables
columns(iris, Species, "^.") %>% head()
##   Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1  setosa          5.1         3.5          1.4         0.2
## 2  setosa          4.9         3.0          1.4         0.2
## 3  setosa          4.7         3.2          1.3         0.2
## 4  setosa          4.6         3.1          1.5         0.2
## 5  setosa          5.0         3.6          1.4         0.2
## 6  setosa          5.4         3.9          1.7         0.4
# pattern "^.*al" means "contains 'al'"
columns(iris, "^.*al") %>% head()
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4
# numeric indexing - all variables except Species
columns(iris, 1:4) %>% head()
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4
# A 'take' with summary functions applied without 'by' argument returns an aggregated data
mtcars %>%
    take(mean = mean(disp), n = .N)
##        mean     n
##       <num> <int>
## 1: 230.7219    32
# Usually, you'll want to group first
mtcars %>%
    take(mean = mean(disp), n = .N, by = am)
##       am     mean     n
##    <num>    <num> <int>
## 1:     1 143.5308    13
## 2:     0 290.3789    19
# grouping by multiple variables
mtcars %>%
    take(mean = mean(disp), n = .N, by = list(am, vs))
##       am    vs     mean     n
##    <num> <num>    <num> <int>
## 1:     1     0 206.2167     6
## 2:     1     1  89.8000     7
## 3:     0     1 175.1143     7
## 4:     0     0 357.6167    12
# You can group by expressions:
mtcars %>%
    take_all(
        mean,
        by = list(vsam = vs + am)
    )
##     vsam      mpg      cyl     disp        hp     drat       wt     qsec
##    <num>    <num>    <num>    <num>     <num>    <num>    <num>    <num>
## 1:     1 20.28462 5.692308 189.4692 138.46154 3.738462 3.038846 18.04231
## 2:     2 28.37143 4.000000  89.8000  80.57143 4.148571 2.028286 18.70000
## 3:     0 15.05000 8.000000 357.6167 194.16667 3.120833 4.104083 17.14250
##        gear     carb
##       <num>    <num>
## 1: 4.076923 3.307692
## 2: 4.142857 1.428571
## 3: 3.000000 3.083333
# modify all non-grouping variables in-place
mtcars %>%
    let_all((.x - mean(.x))/sd(.x), by = am) %>%
    head()
##           mpg        cyl       disp         hp       drat         wt       qsec
##         <num>      <num>      <num>      <num>      <num>      <num>      <num>
## 1: -0.5501185  0.5945745  0.1888587 -0.2004008 -0.4120299  0.3387459 -0.5021316
## 2: -0.5501185  0.5945745  0.1888587 -0.2004008 -0.4120299  0.7520483 -0.1896942
## 3: -0.2582189 -0.6936702 -0.4074443 -0.4026317 -0.5493732 -0.1474922  0.6974050
## 4:  1.1091990 -0.6133196 -0.2938955 -0.9323843 -0.5259081 -0.7124963  0.7176592
## 5:  0.4049674  0.6814663  0.6319326  0.2733692 -0.3474750 -0.4230701 -0.6641654
## 6:  0.2484716 -0.6133196 -0.5934281 -1.0251346 -1.3416023 -0.3973433  1.1630407
##            vs    am       gear       carb
##         <num> <num>      <num>      <num>
## 1: -1.0377490     1 -0.7595545  0.4944600
## 2: -1.0377490     1 -0.7595545  0.4944600
## 3:  0.8894992     1 -0.7595545 -0.8829642
## 4:  1.2743862     0 -0.5026247 -1.5141438
## 5: -0.7433919     0 -0.5026247 -0.6423641
## 6:  1.2743862     0 -0.5026247 -1.5141438
# modify all non-grouping variables to new variables
mtcars %>%
    let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
    head()
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb mpg_scaled
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>      <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4 -0.5501185
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4 -0.5501185
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1 -0.2582189
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1  1.1091990
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2  0.4049674
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1  0.2484716
##    cyl_scaled disp_scaled  hp_scaled drat_scaled  wt_scaled qsec_scaled
##         <num>       <num>      <num>       <num>      <num>       <num>
## 1:  0.5945745   0.1888587 -0.2004008  -0.4120299  0.3387459  -0.5021316
## 2:  0.5945745   0.1888587 -0.2004008  -0.4120299  0.7520483  -0.1896942
## 3: -0.6936702  -0.4074443 -0.4026317  -0.5493732 -0.1474922   0.6974050
## 4: -0.6133196  -0.2938955 -0.9323843  -0.5259081 -0.7124963   0.7176592
## 5:  0.6814663   0.6319326  0.2733692  -0.3474750 -0.4230701  -0.6641654
## 6: -0.6133196  -0.5934281 -1.0251346  -1.3416023 -0.3973433   1.1630407
##     vs_scaled gear_scaled carb_scaled
##         <num>       <num>       <num>
## 1: -1.0377490  -0.7595545   0.4944600
## 2: -1.0377490  -0.7595545   0.4944600
## 3:  0.8894992  -0.7595545  -0.8829642
## 4:  1.2743862  -0.5026247  -1.5141438
## 5: -0.7433919  -0.5026247  -0.6423641
## 6:  1.2743862  -0.5026247  -1.5141438
# conditionally modify all variables
iris %>%
    let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
    head()
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Length_mean
##           <num>       <num>        <num>       <num>  <fctr>             <num>
## 1:          5.1         3.5          1.4         0.2  setosa          5.843333
## 2:          4.9         3.0          1.4         0.2  setosa          5.843333
## 3:          4.7         3.2          1.3         0.2  setosa          5.843333
## 4:          4.6         3.1          1.5         0.2  setosa          5.843333
## 5:          5.0         3.6          1.4         0.2  setosa          5.843333
## 6:          5.4         3.9          1.7         0.4  setosa          5.843333
##    Sepal.Width_mean Petal.Length_mean Petal.Width_mean
##               <num>             <num>            <num>
## 1:         3.057333             3.758         1.199333
## 2:         3.057333             3.758         1.199333
## 3:         3.057333             3.758         1.199333
## 4:         3.057333             3.758         1.199333
## 5:         3.057333             3.758         1.199333
## 6:         3.057333             3.758         1.199333
# modify all variables conditionally on name
iris %>%
    let_all(
        mean = if(startsWith(.name, "Sepal")) mean(.x),
        median = if(startsWith(.name, "Petal")) median(.x),
        by = Species
    ) %>%
    head()
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Length_mean
##           <num>       <num>        <num>       <num>  <fctr>             <num>
## 1:          5.1         3.5          1.4         0.2  setosa             5.006
## 2:          4.9         3.0          1.4         0.2  setosa             5.006
## 3:          4.7         3.2          1.3         0.2  setosa             5.006
## 4:          4.6         3.1          1.5         0.2  setosa             5.006
## 5:          5.0         3.6          1.4         0.2  setosa             5.006
## 6:          5.4         3.9          1.7         0.4  setosa             5.006
##    Sepal.Width_mean Petal.Length_median Petal.Width_median
##               <num>               <num>              <num>
## 1:            3.428                 1.5                0.2
## 2:            3.428                 1.5                0.2
## 3:            3.428                 1.5                0.2
## 4:            3.428                 1.5                0.2
## 5:            3.428                 1.5                0.2
## 6:            3.428                 1.5                0.2
# aggregation with 'take_all'
mtcars %>%
    take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)
##       am mpg_mean cyl_mean disp_mean  hp_mean drat_mean  wt_mean qsec_mean
##    <num>    <num>    <num>     <num>    <num>     <num>    <num>     <num>
## 1:     1 24.39231 5.076923  143.5308 126.8462  4.050000 2.411000  17.36000
## 2:     0 17.14737 6.947368  290.3789 160.2632  3.286316 3.768895  18.18316
##      vs_mean gear_mean carb_mean   mpg_sd   cyl_sd   disp_sd    hp_sd   drat_sd
##        <num>     <num>     <num>    <num>    <num>     <num>    <num>     <num>
## 1: 0.5384615  4.384615  2.923077 6.166504 1.552500  87.20399 84.06232 0.3640513
## 2: 0.3684211  3.210526  2.736842 3.833966 1.544657 110.17165 53.90820 0.3923039
##        wt_sd  qsec_sd     vs_sd   gear_sd  carb_sd mpg_n cyl_n disp_n  hp_n
##        <num>    <num>     <num>     <num>    <num> <int> <int>  <int> <int>
## 1: 0.6169816 1.792359 0.5188745 0.5063697 2.177978    13    13     13    13
## 2: 0.7774001 1.751308 0.4955946 0.4188539 1.147079    19    19     19    19
##    drat_n  wt_n qsec_n  vs_n gear_n carb_n
##     <int> <int>  <int> <int>  <int>  <int>
## 1:     13    13     13    13     13     13
## 2:     19    19     19    19     19     19
# conditionally aggregate all variables
iris %>%
    take_all(mean = if(is.numeric(.x)) mean(.x))
##    Sepal.Length_mean Sepal.Width_mean Petal.Length_mean Petal.Width_mean
##                <num>            <num>             <num>            <num>
## 1:          5.843333         3.057333             3.758         1.199333
# aggregate all variables conditionally on name
iris %>%
    take_all(
        mean = if(startsWith(.name, "Sepal")) mean(.x),
        median = if(startsWith(.name, "Petal")) median(.x),
        by = Species
    )
##       Species Sepal.Length_mean Sepal.Width_mean Petal.Length_median
##        <fctr>             <num>            <num>               <num>
## 1:     setosa             5.006            3.428                1.50
## 2: versicolor             5.936            2.770                4.35
## 3:  virginica             6.588            2.974                5.55
##    Petal.Width_median
##                 <num>
## 1:                0.2
## 2:                1.3
## 3:                2.0
# parametric evaluation:
var = quote(mean(cyl))
mtcars %>% 
    let(mean_cyl = eval(var)) %>% 
    head()
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb mean_cyl
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>    <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4   6.1875
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4   6.1875
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1   6.1875
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1   6.1875
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2   6.1875
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1   6.1875
take(mtcars, eval(var))
##    eval(var)
##        <num>
## 1:    6.1875
# all together
new_var = "mean_cyl"
mtcars %>% 
    let((new_var) := eval(var)) %>% 
    head()
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb mean_cyl
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>    <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4   6.1875
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4   6.1875
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1   6.1875
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1   6.1875
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2   6.1875
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1   6.1875
take(mtcars, (new_var) := eval(var))
##    mean_cyl
##       <num>
## 1:   6.1875

Variable selection in the expressions

You can use ‘columns’ inside expression in the ‘take’/‘let’. ‘columns’ will be replaced with data.table with selected columns. In ‘let’ in the expressions with ‘:=’, ‘cols’ or ‘%to%’ can be placed in the left part of the expression. It is usefull for multiple assignment. There are four ways of column selection:

  1. Simply by column names
  2. By variable ranges, e. g. vs:carb. Alternatively, you can use ‘%to%’ instead of colon: ‘vs %to% carb’.
  3. With regular expressions. Characters which start with ‘^’ or end with $ considered as Perl-style regular expression patterns. For example, ‘^Petal’ returns all variables started with ‘Petal’. ‘Width$’ returns all variables which end with ‘Width’. Pattern ‘^.’ matches all variables and pattern ’^.*my_str’ is equivalent to contains “my_str”’.
  4. By character variables with interpolated parts. Expression in the curly brackets inside characters will be evaluated in the parent frame with ‘text_expand’ function. For example, a{1:3} will be transformed to the names ‘a1’, ‘a2’, ‘a3’. ‘cols’ is just a shortcut for ‘columns’.
# range selection
iris %>% 
    let(
        avg = rowMeans(Sepal.Length %to% Petal.Width)
    ) %>% 
    head()
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species   avg
##           <num>       <num>        <num>       <num>  <fctr> <num>
## 1:          5.1         3.5          1.4         0.2  setosa 2.550
## 2:          4.9         3.0          1.4         0.2  setosa 2.375
## 3:          4.7         3.2          1.3         0.2  setosa 2.350
## 4:          4.6         3.1          1.5         0.2  setosa 2.350
## 5:          5.0         3.6          1.4         0.2  setosa 2.550
## 6:          5.4         3.9          1.7         0.4  setosa 2.850
# multiassignment
iris %>% 
    let(
        # starts with Sepal or Petal
        multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
    ) %>% 
    head()
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species multipled1
##           <num>       <num>        <num>       <num>  <fctr>      <num>
## 1:          5.1         3.5          1.4         0.2  setosa       10.2
## 2:          4.9         3.0          1.4         0.2  setosa        9.8
## 3:          4.7         3.2          1.3         0.2  setosa        9.4
## 4:          4.6         3.1          1.5         0.2  setosa        9.2
## 5:          5.0         3.6          1.4         0.2  setosa       10.0
## 6:          5.4         3.9          1.7         0.4  setosa       10.8
##    multipled2 multipled3 multipled4
##         <num>      <num>      <num>
## 1:        7.0        2.8        0.4
## 2:        6.0        2.8        0.4
## 3:        6.4        2.6        0.4
## 4:        6.2        3.0        0.4
## 5:        7.2        2.8        0.4
## 6:        7.8        3.4        0.8
mtcars %>% 
    let(
        # text expansion
        cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
    ) %>% 
    head()
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb scaled_mpg
##    <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>      <num>
## 1:  21.0     6   160   110  3.90 2.620 16.46     0     1     4     4  0.1508848
## 2:  21.0     6   160   110  3.90 2.875 17.02     0     1     4     4  0.1508848
## 3:  22.8     4   108    93  3.85 2.320 18.61     1     1     4     1  0.4495434
## 4:  21.4     6   258   110  3.08 3.215 19.44     1     0     3     1  0.2172534
## 5:  18.7     8   360   175  3.15 3.440 17.02     0     0     3     2 -0.2307345
## 6:  18.1     6   225   105  2.76 3.460 20.22     1     0     3     1 -0.3302874
##    scaled_cyl scaled_disp  scaled_hp scaled_drat    scaled_wt scaled_qsec
##         <num>       <num>      <num>       <num>        <num>       <num>
## 1: -0.1049878 -0.57061982 -0.5350928   0.5675137 -0.610399567  -0.7771651
## 2: -0.1049878 -0.57061982 -0.5350928   0.5675137 -0.349785269  -0.4637808
## 3: -1.2248578 -0.99018209 -0.7830405   0.4739996 -0.917004624   0.4260068
## 4: -0.1049878  0.22009369 -0.5350928  -0.9661175 -0.002299538   0.8904872
## 5:  1.0148821  1.04308123  0.4129422  -0.8351978  0.227654255  -0.4637808
## 6: -0.1049878 -0.04616698 -0.6080186  -1.5646078  0.248094592   1.3269868
##     scaled_vs  scaled_am scaled_gear scaled_carb
##         <num>      <num>       <num>       <num>
## 1: -0.8680278  1.1899014   0.4235542   0.7352031
## 2: -0.8680278  1.1899014   0.4235542   0.7352031
## 3:  1.1160357  1.1899014   0.4235542  -1.1221521
## 4:  1.1160357 -0.8141431  -0.9318192  -1.1221521
## 5: -0.8680278 -0.8141431  -0.9318192  -0.5030337
## 6:  1.1160357 -0.8141431  -0.9318192  -1.1221521
# range selection in 'by'
# selection of range + additional column
mtcars %>% 
    take(
        res = sum(cols(mpg, disp %to% drat)),
        by = vs %to% gear
    )
##       vs    am  gear     res
##    <num> <num> <num>   <num>
## 1:     0     1     4  589.80
## 2:     1     1     4 1177.97
## 3:     1     0     3  985.64
## 4:     0     0     3 6839.45
## 5:     1     0     4 1125.35
## 6:     0     1     5 1874.61
## 7:     1     1     5  242.27

Joins

Here we use the same datasets as with lookups:

workers = fread("
    name company
    Nick Acme
    John Ajax
    Daniela Ajax
")

positions = fread("
    name position
    John designer
    Daniela engineer
    Cathie manager
")

workers
##       name company
##     <char>  <char>
## 1:    Nick    Acme
## 2:    John    Ajax
## 3: Daniela    Ajax
positions
##       name position
##     <char>   <char>
## 1:    John designer
## 2: Daniela engineer
## 3:  Cathie  manager

Different kinds of joins:

workers %>% dt_inner_join(positions)
## dt_inner_join: joining, by = "name"
##       name company position
##     <char>  <char>   <char>
## 1:    John    Ajax designer
## 2: Daniela    Ajax engineer
workers %>% dt_left_join(positions)
## dt_left_join: joining, by = "name"
##       name company position
##     <char>  <char>   <char>
## 1:    Nick    Acme     <NA>
## 2:    John    Ajax designer
## 3: Daniela    Ajax engineer
workers %>% dt_right_join(positions)
## dt_right_join: joining, by = "name"
##       name company position
##     <char>  <char>   <char>
## 1:    John    Ajax designer
## 2: Daniela    Ajax engineer
## 3:  Cathie    <NA>  manager
workers %>% dt_full_join(positions)
## dt_full_join: joining, by = "name"
##       name company position
##     <char>  <char>   <char>
## 1:    Nick    Acme     <NA>
## 2:    John    Ajax designer
## 3: Daniela    Ajax engineer
## 4:  Cathie    <NA>  manager
# filtering joins
workers %>% dt_anti_join(positions)
## dt_anti_join: joining, by = "name"
##      name company
##    <char>  <char>
## 1:   Nick    Acme
workers %>% dt_semi_join(positions)
## dt_semi_join: joining, by = "name"
##       name company
##     <char>  <char>
## 1:    John    Ajax
## 2: Daniela    Ajax

To suppress the message, supply by argument:

workers %>% dt_left_join(positions, by = "name")

Use a named by if the join variables have different names:

positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
workers %>% dt_inner_join(positions2, by = c("name" = "worker"))