1 Working with data.table

library(data.table)

?data.table # Enhanced data.frame

myDT <- data.table(x = rnorm(12), y = rep(letters[1:3], each = 4), z = rpois(12, 2))
head(x = myDT, 5)
##             x y z
## 1: -0.7300522 a 2
## 2:  0.1245984 a 1
## 3:  0.1459543 a 3
## 4: -1.7862813 a 1
## 5:  0.6491136 b 0

See all the data tables in memory

tables()
##      NAME NROW NCOL MB COLS  KEY
## [1,] myDT   12    3  1 x,y,z    
## Total: 1MB

2 Subsetting the rows in a data table

myDT[10, ]
##            x y z
## 1: 0.6980136 c 1
myDT[myDT$y < "c"]
##             x y z
## 1: -0.7300522 a 2
## 2:  0.1245984 a 1
## 3:  0.1459543 a 3
## 4: -1.7862813 a 1
## 5:  0.6491136 b 0
## 6: -0.4203274 b 5
## 7: -1.1054911 b 3
## 8: -2.6015095 b 4
myDT[c(2, 3),]
##            x y z
## 1: 0.1245984 a 1
## 2: 0.1459543 a 3

The column subsetting approach does not work as expected

myDT[,c(2, 3)]
## [1] 2 3
myDT$x
##  [1] -0.7300522  0.1245984  0.1459543 -1.7862813  0.6491136 -0.4203274
##  [7] -1.1054911 -2.6015095  0.8397754  0.6980136 -0.7980801  0.4900866

3 Calculating values for variables with expressions

myDT[, list(Mean = mean(x), Sum = sum(z))]
##          Mean Sum
## 1: -0.3745166  27
myDT[, table(y)]
## y
## a b c 
## 4 4 4

Don’t you find it very easy to work with data table variables (columns) Adding new columns in a data table Note the := operator With a data table and unlike a data frame, a new copy is not created when you add a column, so, if you need then backup the older data table

myDT[, w := z ^ 2]
##              x y z  w
##  1: -0.7300522 a 2  4
##  2:  0.1245984 a 1  1
##  3:  0.1459543 a 3  9
##  4: -1.7862813 a 1  1
##  5:  0.6491136 b 0  0
##  6: -0.4203274 b 5 25
##  7: -1.1054911 b 3  9
##  8: -2.6015095 b 4 16
##  9:  0.8397754 c 3  9
## 10:  0.6980136 c 1  1
## 11: -0.7980801 c 3  9
## 12:  0.4900866 c 1  1
head(myDT)
##             x y z  w
## 1: -0.7300522 a 2  4
## 2:  0.1245984 a 1  1
## 3:  0.1459543 a 3  9
## 4: -1.7862813 a 1  1
## 5:  0.6491136 b 0  0
## 6: -0.4203274 b 5 25

Use the copy() function to create a copy of any data table

?copy
myDT.copy <- copy(myDT)
myDT.copy[, y := 3]
## Warning in `[.data.table`(myDT.copy, , `:=`(y, 3)): Coerced 'double' RHS
## to 'character' to match the column's type; may have truncated precision.
## Either change the target column to 'double' first (by creating a new
## 'double' vector length 12 (nrows of entire table) and assign that; i.e.
## 'replace' column), or coerce RHS to 'character' (e.g. 1L, NA_[real|
## integer]_, as.*, etc) to make your intent clear and for speed. Or, set the
## column type correctly up front when you create the table and stick to it,
## please.
##              x y z  w
##  1: -0.7300522 3 2  4
##  2:  0.1245984 3 1  1
##  3:  0.1459543 3 3  9
##  4: -1.7862813 3 1  1
##  5:  0.6491136 3 0  0
##  6: -0.4203274 3 5 25
##  7: -1.1054911 3 3  9
##  8: -2.6015095 3 4 16
##  9:  0.8397754 3 3  9
## 10:  0.6980136 3 1  1
## 11: -0.7980801 3 3  9
## 12:  0.4900866 3 1  1
head(myDT.copy)
##             x y z  w
## 1: -0.7300522 3 2  4
## 2:  0.1245984 3 1  1
## 3:  0.1459543 3 3  9
## 4: -1.7862813 3 1  1
## 5:  0.6491136 3 0  0
## 6: -0.4203274 3 5 25
head(myDT)
##             x y z  w
## 1: -0.7300522 a 2  4
## 2:  0.1245984 a 1  1
## 3:  0.1459543 a 3  9
## 4: -1.7862813 a 1  1
## 5:  0.6491136 b 0  0
## 6: -0.4203274 b 5 25

You can see that myDT and myDT.copy are different now You can perform multiple operations within curly brackets

myDT.copy[, newCol := {temp <- w * w; log10(temp)}]
##              x y z  w   newCol
##  1: -0.7300522 3 2  4 1.204120
##  2:  0.1245984 3 1  1 0.000000
##  3:  0.1459543 3 3  9 1.908485
##  4: -1.7862813 3 1  1 0.000000
##  5:  0.6491136 3 0  0     -Inf
##  6: -0.4203274 3 5 25 2.795880
##  7: -1.1054911 3 3  9 1.908485
##  8: -2.6015095 3 4 16 2.408240
##  9:  0.8397754 3 3  9 1.908485
## 10:  0.6980136 3 1  1 0.000000
## 11: -0.7980801 3 3  9 1.908485
## 12:  0.4900866 3 1  1 0.000000
head(myDT.copy)
##             x y z  w   newCol
## 1: -0.7300522 3 2  4 1.204120
## 2:  0.1245984 3 1  1 0.000000
## 3:  0.1459543 3 3  9 1.908485
## 4: -1.7862813 3 1  1 0.000000
## 5:  0.6491136 3 0  0     -Inf
## 6: -0.4203274 3 5 25 2.795880

You can even perform plyr like operations

myDT.copy[, b := z > 2]
##              x y z  w   newCol     b
##  1: -0.7300522 3 2  4 1.204120 FALSE
##  2:  0.1245984 3 1  1 0.000000 FALSE
##  3:  0.1459543 3 3  9 1.908485  TRUE
##  4: -1.7862813 3 1  1 0.000000 FALSE
##  5:  0.6491136 3 0  0     -Inf FALSE
##  6: -0.4203274 3 5 25 2.795880  TRUE
##  7: -1.1054911 3 3  9 1.908485  TRUE
##  8: -2.6015095 3 4 16 2.408240  TRUE
##  9:  0.8397754 3 3  9 1.908485  TRUE
## 10:  0.6980136 3 1  1 0.000000 FALSE
## 11: -0.7980801 3 3  9 1.908485  TRUE
## 12:  0.4900866 3 1  1 0.000000 FALSE
head(myDT.copy)
##             x y z  w   newCol     b
## 1: -0.7300522 3 2  4 1.204120 FALSE
## 2:  0.1245984 3 1  1 0.000000 FALSE
## 3:  0.1459543 3 3  9 1.908485  TRUE
## 4: -1.7862813 3 1  1 0.000000 FALSE
## 5:  0.6491136 3 0  0     -Inf FALSE
## 6: -0.4203274 3 5 25 2.795880  TRUE