Skip to content

Performance regression with .N and := #5424

@svraka

Description

@svraka

On the latest Github master dt[, N = .N, by = id] became much slower than on the latest CRAN version. The output below is from an M1 Mac but the issue can be reproduced on x64 Linux and Windows as well. I included tests for all optimization levels. I haven't found similar issues with other optimized functions.

CRAN (1.14.2)

library(data.table)
options(datatable.verbose = TRUE)

N <- 1e8
n <- 1e6

set.seed(1L)
dt <- data.table(g = sample(seq_len(n), N, TRUE),
                 x = runif(N),
                 key = "g")
#> forder.c received 100000000 rows and 2 columns
#> forder took 2.256 sec
#> reorder took 1.934s elapsed (4.338s cpu)

options(datatable.optimize = 0L)
dt_mod <- copy(dt)
dt_mod[, N := .N, by = g]
#> Detected that j uses these columns: <none> 
#> Finding groups using uniqlist on key ... 0.115s elapsed (0.097s cpu) 
#> Finding group sizes from the positions (can be avoided to save RAM) ... 0.004s elapsed (0.003s cpu) 
#> All optimizations are turned off
#> Making each group and running j (GForce FALSE) ... 
#>   memcpy contiguous groups took 0.310s for 1000000 groups
#>   eval(j) took 0.300s for 1000000 calls
#> 1.412s elapsed (0.630s cpu)

options(datatable.optimize = 1L)
dt_mod <- copy(dt)
dt_mod[, N := .N, by = g]
#> Detected that j uses these columns: <none> 
#> Finding groups using uniqlist on key ... 0.112s elapsed (0.099s cpu) 
#> Finding group sizes from the positions (can be avoided to save RAM) ... 0.003s elapsed (0.003s cpu) 
#> lapply optimization is on, j unchanged as '.N'
#> Making each group and running j (GForce FALSE) ... 
#>   memcpy contiguous groups took 0.312s for 1000000 groups
#>   eval(j) took 0.300s for 1000000 calls
#> 1.767s elapsed (0.610s cpu)

options(datatable.optimize = 2L)
dt_mod <- copy(dt)
dt_mod[, N := .N, by = g]
#> Detected that j uses these columns: <none> 
#> Finding groups using uniqlist on key ... 0.104s elapsed (0.096s cpu) 
#> Finding group sizes from the positions (can be avoided to save RAM) ... 0.004s elapsed (0.003s cpu) 
#> lapply optimization is on, j unchanged as '.N'
#> Making each group and running j (GForce FALSE) ... 
#>   memcpy contiguous groups took 0.311s for 1000000 groups
#>   eval(j) took 0.298s for 1000000 calls
#> 1.442s elapsed (0.613s cpu)

options(datatable.optimize = 3L)
dt_mod <- copy(dt)
dt_mod[, N := .N, by = g]
#> Detected that j uses these columns: <none> 
#> Finding groups using uniqlist on key ... 0.114s elapsed (0.098s cpu) 
#> Finding group sizes from the positions (can be avoided to save RAM) ... 0.003s elapsed (0.003s cpu) 
#> lapply optimization is on, j unchanged as '.N'
#> Making each group and running j (GForce FALSE) ... 
#>   memcpy contiguous groups took 0.308s for 1000000 groups
#>   eval(j) took 0.296s for 1000000 calls
#> 1.364s elapsed (0.595s cpu)

Created on 2022-07-25 by the reprex package (v2.0.1)

Session info
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Monterey 12.5
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] data.table_1.14.2
#> 
#> loaded via a namespace (and not attached):
#>  [1] digest_0.6.29   withr_2.5.0     magrittr_2.0.3  reprex_2.0.1   
#>  [5] evaluate_0.15   highr_0.9       stringi_1.7.8   rlang_1.0.4    
#>  [9] cli_3.3.0       fs_1.5.2        rmarkdown_2.14  tools_4.2.1    
#> [13] stringr_1.4.0   glue_1.6.2      xfun_0.31       yaml_2.3.5     
#> [17] fastmap_1.1.0   compiler_4.2.1  htmltools_0.5.3 knitr_1.39

Github (master)

library(data.table)
options(datatable.verbose = TRUE)

N <- 1e8
n <- 1e6

set.seed(1L)
dt <- data.table(g = sample(seq_len(n), N, TRUE),
                 x = runif(N),
                 key = "g")
#> forder.c received 100000000 rows and 2 columns
#> forder took 2.672 sec
#> reorder took 1.965s elapsed (4.403s cpu)

options(datatable.optimize = 0L)
dt_mod <- copy(dt)
dt_mod[, N := .N, by = g]
#> Argument 'by' after substitute: g
#> Detected that j uses these columns: <none>
#> Finding groups using uniqlist on key ... 0.174s elapsed (0.105s cpu) 
#> Finding group sizes from the positions (can be avoided to save RAM) ... 0.014s elapsed (0.003s cpu) 
#> All optimizations are turned off
#> Making each group and running j (GForce FALSE) ... 
#>   memcpy contiguous groups took 0.260s for 1000000 groups
#>   eval(j) took 0.249s for 1000000 calls
#> 2.103s elapsed (1.489s cpu)

options(datatable.optimize = 1L)
dt_mod <- copy(dt)
dt_mod[, N := .N, by = g]
#> Argument 'by' after substitute: g
#> Detected that j uses these columns: <none>
#> Finding groups using uniqlist on key ... 0.116s elapsed (0.099s cpu) 
#> Finding group sizes from the positions (can be avoided to save RAM) ... 0.004s elapsed (0.003s cpu) 
#> lapply optimization is on, j unchanged as '.N'
#> Making each group and running j (GForce FALSE) ... 
#>   memcpy contiguous groups took 0.263s for 1000000 groups
#>   eval(j) took 0.249s for 1000000 calls
#> 2.294s elapsed (1.479s cpu)

options(datatable.optimize = 2L)
dt_mod <- copy(dt)
dt_mod[, N := .N, by = g]
#> Argument 'by' after substitute: g
#> Detected that j uses these columns: <none>
#> Finding groups using uniqlist on key ... 0.096s elapsed (0.094s cpu) 
#> Finding group sizes from the positions (can be avoided to save RAM) ... 0.004s elapsed (0.003s cpu) 
#> lapply optimization is on, j unchanged as '.N'
#> Making each group and running j (GForce FALSE) ... 
#>   memcpy contiguous groups took 0.262s for 1000000 groups
#>   eval(j) took 0.249s for 1000000 calls
#> 2.096s elapsed (1.468s cpu)

options(datatable.optimize = 3L)
dt_mod <- copy(dt)
dt_mod[, N := .N, by = g]
#> Argument 'by' after substitute: g
#> Detected that j uses these columns: <none>
#> Finding groups using uniqlist on key ... 0.114s elapsed (0.098s cpu) 
#> Finding group sizes from the positions (can be avoided to save RAM) ... 0.003s elapsed (0.004s cpu) 
#> lapply optimization is on, j unchanged as '.N'
#> Making each group and running j (GForce FALSE) ... 
#>   memcpy contiguous groups took 0.261s for 1000000 groups
#>   eval(j) took 0.247s for 1000000 calls
#> 2.048s elapsed (1.464s cpu)

Created on 2022-07-26 by the reprex package (v2.0.1)

Session info
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Monterey 12.5
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] data.table_1.14.3
#> 
#> loaded via a namespace (and not attached):
#>  [1] digest_0.6.29   withr_2.5.0     magrittr_2.0.3  reprex_2.0.1   
#>  [5] evaluate_0.15   highr_0.9       stringi_1.7.8   rlang_1.0.4    
#>  [9] cli_3.3.0       fs_1.5.2        rmarkdown_2.14  tools_4.2.1    
#> [13] stringr_1.4.0   glue_1.6.2      xfun_0.31       yaml_2.3.5     
#> [17] fastmap_1.1.0   compiler_4.2.1  htmltools_0.5.3 knitr_1.39

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions