library(data.table)
?data.table # Enhanced data.frame
myDT <- data.table(x = rnorm(12), y = rep(letters[1:3], each = 4), z = rpois(12, 2))
head(x = myDT, 5)
## x y z
## 1: -0.7300522 a 2
## 2: 0.1245984 a 1
## 3: 0.1459543 a 3
## 4: -1.7862813 a 1
## 5: 0.6491136 b 0
See all the data tables in memory
tables()
## NAME NROW NCOL MB COLS KEY
## [1,] myDT 12 3 1 x,y,z
## Total: 1MB
myDT[10, ]
## x y z
## 1: 0.6980136 c 1
myDT[myDT$y < "c"]
## x y z
## 1: -0.7300522 a 2
## 2: 0.1245984 a 1
## 3: 0.1459543 a 3
## 4: -1.7862813 a 1
## 5: 0.6491136 b 0
## 6: -0.4203274 b 5
## 7: -1.1054911 b 3
## 8: -2.6015095 b 4
myDT[c(2, 3),]
## x y z
## 1: 0.1245984 a 1
## 2: 0.1459543 a 3
The column subsetting approach does not work as expected
myDT[,c(2, 3)]
## [1] 2 3
myDT$x
## [1] -0.7300522 0.1245984 0.1459543 -1.7862813 0.6491136 -0.4203274
## [7] -1.1054911 -2.6015095 0.8397754 0.6980136 -0.7980801 0.4900866
myDT[, list(Mean = mean(x), Sum = sum(z))]
## Mean Sum
## 1: -0.3745166 27
myDT[, table(y)]
## y
## a b c
## 4 4 4
Don’t you find it very easy to work with data table variables (columns) Adding new columns in a data table Note the := operator With a data table and unlike a data frame, a new copy is not created when you add a column, so, if you need then backup the older data table
myDT[, w := z ^ 2]
## x y z w
## 1: -0.7300522 a 2 4
## 2: 0.1245984 a 1 1
## 3: 0.1459543 a 3 9
## 4: -1.7862813 a 1 1
## 5: 0.6491136 b 0 0
## 6: -0.4203274 b 5 25
## 7: -1.1054911 b 3 9
## 8: -2.6015095 b 4 16
## 9: 0.8397754 c 3 9
## 10: 0.6980136 c 1 1
## 11: -0.7980801 c 3 9
## 12: 0.4900866 c 1 1
head(myDT)
## x y z w
## 1: -0.7300522 a 2 4
## 2: 0.1245984 a 1 1
## 3: 0.1459543 a 3 9
## 4: -1.7862813 a 1 1
## 5: 0.6491136 b 0 0
## 6: -0.4203274 b 5 25
Use the copy() function to create a copy of any data table
?copy
myDT.copy <- copy(myDT)
myDT.copy[, y := 3]
## Warning in `[.data.table`(myDT.copy, , `:=`(y, 3)): Coerced 'double' RHS
## to 'character' to match the column's type; may have truncated precision.
## Either change the target column to 'double' first (by creating a new
## 'double' vector length 12 (nrows of entire table) and assign that; i.e.
## 'replace' column), or coerce RHS to 'character' (e.g. 1L, NA_[real|
## integer]_, as.*, etc) to make your intent clear and for speed. Or, set the
## column type correctly up front when you create the table and stick to it,
## please.
## x y z w
## 1: -0.7300522 3 2 4
## 2: 0.1245984 3 1 1
## 3: 0.1459543 3 3 9
## 4: -1.7862813 3 1 1
## 5: 0.6491136 3 0 0
## 6: -0.4203274 3 5 25
## 7: -1.1054911 3 3 9
## 8: -2.6015095 3 4 16
## 9: 0.8397754 3 3 9
## 10: 0.6980136 3 1 1
## 11: -0.7980801 3 3 9
## 12: 0.4900866 3 1 1
head(myDT.copy)
## x y z w
## 1: -0.7300522 3 2 4
## 2: 0.1245984 3 1 1
## 3: 0.1459543 3 3 9
## 4: -1.7862813 3 1 1
## 5: 0.6491136 3 0 0
## 6: -0.4203274 3 5 25
head(myDT)
## x y z w
## 1: -0.7300522 a 2 4
## 2: 0.1245984 a 1 1
## 3: 0.1459543 a 3 9
## 4: -1.7862813 a 1 1
## 5: 0.6491136 b 0 0
## 6: -0.4203274 b 5 25
You can see that myDT and myDT.copy are different now You can perform multiple operations within curly brackets
myDT.copy[, newCol := {temp <- w * w; log10(temp)}]
## x y z w newCol
## 1: -0.7300522 3 2 4 1.204120
## 2: 0.1245984 3 1 1 0.000000
## 3: 0.1459543 3 3 9 1.908485
## 4: -1.7862813 3 1 1 0.000000
## 5: 0.6491136 3 0 0 -Inf
## 6: -0.4203274 3 5 25 2.795880
## 7: -1.1054911 3 3 9 1.908485
## 8: -2.6015095 3 4 16 2.408240
## 9: 0.8397754 3 3 9 1.908485
## 10: 0.6980136 3 1 1 0.000000
## 11: -0.7980801 3 3 9 1.908485
## 12: 0.4900866 3 1 1 0.000000
head(myDT.copy)
## x y z w newCol
## 1: -0.7300522 3 2 4 1.204120
## 2: 0.1245984 3 1 1 0.000000
## 3: 0.1459543 3 3 9 1.908485
## 4: -1.7862813 3 1 1 0.000000
## 5: 0.6491136 3 0 0 -Inf
## 6: -0.4203274 3 5 25 2.795880
You can even perform plyr like operations
myDT.copy[, b := z > 2]
## x y z w newCol b
## 1: -0.7300522 3 2 4 1.204120 FALSE
## 2: 0.1245984 3 1 1 0.000000 FALSE
## 3: 0.1459543 3 3 9 1.908485 TRUE
## 4: -1.7862813 3 1 1 0.000000 FALSE
## 5: 0.6491136 3 0 0 -Inf FALSE
## 6: -0.4203274 3 5 25 2.795880 TRUE
## 7: -1.1054911 3 3 9 1.908485 TRUE
## 8: -2.6015095 3 4 16 2.408240 TRUE
## 9: 0.8397754 3 3 9 1.908485 TRUE
## 10: 0.6980136 3 1 1 0.000000 FALSE
## 11: -0.7980801 3 3 9 1.908485 TRUE
## 12: 0.4900866 3 1 1 0.000000 FALSE
head(myDT.copy)
## x y z w newCol b
## 1: -0.7300522 3 2 4 1.204120 FALSE
## 2: 0.1245984 3 1 1 0.000000 FALSE
## 3: 0.1459543 3 3 9 1.908485 TRUE
## 4: -1.7862813 3 1 1 0.000000 FALSE
## 5: 0.6491136 3 0 0 -Inf FALSE
## 6: -0.4203274 3 5 25 2.795880 TRUE