install.packages(“RMySQL”)
URL - http://genome.ucsc.edu/goldenpath/help/mysql.html
library(RMySQL)
## Loading required package: DBI
ucscDB <- dbConnect(MySQL(), user = "genome", host = "genome-mysql.cse.ucsc.edu")
result <- dbGetQuery(ucscDB, "show databases;")
head(result)
## Database
## 1 information_schema
## 2 ailMel1
## 3 allMis1
## 4 anoCar1
## 5 anoCar2
## 6 anoGam1
It is important to disconnect from the server when you’re done.
dbDisconnect(ucscDB)
## [1] TRUE
I’m connecting to hg19 database
hg19 <- dbConnect(MySQL(), user = "genome", host = "genome-mysql.cse.ucsc.edu", db = "hg19")
allTables <- dbListTables(conn = hg19)
head(allTables)
## [1] "HInv" "HInvGeneMrna" "acembly" "acemblyClass"
## [5] "acemblyPep" "affyCytoScan"
length(allTables)
## [1] 11013
str(allTables)
## chr [1:11013] "HInv" "HInvGeneMrna" "acembly" "acemblyClass" ...
You can do further exploration as needed
allTables[1:100]
## [1] "HInv"
## [2] "HInvGeneMrna"
## [3] "acembly"
## [4] "acemblyClass"
## [5] "acemblyPep"
## [6] "affyCytoScan"
## [7] "affyExonProbeAmbiguous"
## [8] "affyExonProbeCore"
## [9] "affyExonProbeExtended"
## [10] "affyExonProbeFree"
## [11] "affyExonProbeFull"
## [12] "affyExonProbesetAmbiguous"
## [13] "affyExonProbesetCore"
## [14] "affyExonProbesetExtended"
## [15] "affyExonProbesetFree"
## [16] "affyExonProbesetFull"
## [17] "affyGnf1h"
## [18] "affyU133"
## [19] "affyU133Plus2"
## [20] "affyU95"
## [21] "agilentCgh1x1m"
## [22] "agilentCgh1x244k"
## [23] "agilentCgh2x105k"
## [24] "agilentCgh2x400k"
## [25] "agilentCgh4x180k"
## [26] "agilentCgh4x44k"
## [27] "agilentCgh8x60k"
## [28] "agilentCghSnp2x400k"
## [29] "agilentCghSnp4x180k"
## [30] "agilentCghSnpCancer4x180k"
## [31] "agilentHrd1x1m"
## [32] "allHg19RS_BW"
## [33] "all_bacends"
## [34] "all_est"
## [35] "all_fosends"
## [36] "all_mrna"
## [37] "all_sts_primer"
## [38] "all_sts_seq"
## [39] "allenBrainAli"
## [40] "allenBrainGene"
## [41] "allenBrainUrl"
## [42] "altSeqHaplotypesP10"
## [43] "altSeqHaplotypesP9"
## [44] "altSeqLiftOverPslP10"
## [45] "altSeqLiftOverPslP9"
## [46] "altSeqPatchesP10"
## [47] "author"
## [48] "bacEndPairs"
## [49] "bamAllNumtSSorted"
## [50] "bamSLFeld1"
## [51] "bamSLMez1"
## [52] "bamSLSid1253"
## [53] "bamSLVi33dot16"
## [54] "bamSLVi33dot25"
## [55] "bamSLVi33dot26"
## [56] "bigFiles"
## [57] "bioCycMapDesc"
## [58] "bioCycPathway"
## [59] "burgeRnaSeqGemMapperAlignAdipose"
## [60] "burgeRnaSeqGemMapperAlignAdiposeAllRawSignal"
## [61] "burgeRnaSeqGemMapperAlignBT474"
## [62] "burgeRnaSeqGemMapperAlignBT474AllRawSignal"
## [63] "burgeRnaSeqGemMapperAlignBrain"
## [64] "burgeRnaSeqGemMapperAlignBrainAllRawSignal"
## [65] "burgeRnaSeqGemMapperAlignBreast"
## [66] "burgeRnaSeqGemMapperAlignBreastAllRawSignal"
## [67] "burgeRnaSeqGemMapperAlignColon"
## [68] "burgeRnaSeqGemMapperAlignColonAllRawSignal"
## [69] "burgeRnaSeqGemMapperAlignHME"
## [70] "burgeRnaSeqGemMapperAlignHMEAllRawSignal"
## [71] "burgeRnaSeqGemMapperAlignHeart"
## [72] "burgeRnaSeqGemMapperAlignHeartAllRawSignal"
## [73] "burgeRnaSeqGemMapperAlignLiver"
## [74] "burgeRnaSeqGemMapperAlignLiverAllRawSignal"
## [75] "burgeRnaSeqGemMapperAlignLymphNode"
## [76] "burgeRnaSeqGemMapperAlignLymphNodeAllRawSignal"
## [77] "burgeRnaSeqGemMapperAlignMB435"
## [78] "burgeRnaSeqGemMapperAlignMB435AllRawSignal"
## [79] "burgeRnaSeqGemMapperAlignMCF7"
## [80] "burgeRnaSeqGemMapperAlignMCF7AllRawSignal"
## [81] "burgeRnaSeqGemMapperAlignSkelMuscle"
## [82] "burgeRnaSeqGemMapperAlignSkelMuscleAllRawSignal"
## [83] "burgeRnaSeqGemMapperAlignT47D"
## [84] "burgeRnaSeqGemMapperAlignT47DAllRawSignal"
## [85] "burgeRnaSeqGemMapperAlignTestes"
## [86] "burgeRnaSeqGemMapperAlignTestesAllRawSignal"
## [87] "ccdsGene"
## [88] "ccdsInfo"
## [89] "ccdsKgMap"
## [90] "ccdsNotes"
## [91] "cds"
## [92] "ceBlastTab"
## [93] "cell"
## [94] "cgapAlias"
## [95] "cgapBiocDesc"
## [96] "cgapBiocPathway"
## [97] "cgapSage"
## [98] "cgapSageLib"
## [99] "chainAilMel1"
## [100] "chainAilMel1Link"
dbListFields(conn = hg19, name = allTables[100])
## [1] "bin" "tName" "tStart" "tEnd" "qStart" "chainId"
dbGetQuery(conn = hg19, statement = paste("select count(*) from ", allTables[100], collapse = " "))
## count(*)
## 1 42313637
tabData <- dbReadTable(conn = hg19, name = "affyU133Plus2")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 2 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 3 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 4 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 5 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 6 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 7 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 8 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 11
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 12
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 13
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 15
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 16
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 17
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 18
## imported as numeric
warnings()
## NULL
head(tabData[1:5])
## bin matches misMatches repMatches nCount
## 1 585 530 4 0 23
## 2 585 3355 17 0 109
## 3 585 4156 14 0 83
## 4 585 4667 9 0 68
## 5 585 5180 14 0 167
## 6 585 468 5 0 14
nrow(tabData)
## [1] 58463
query <- dbSendQuery(conn = hg19, statement = "select * from affyU133Plus2 where misMatches between 1 and 3")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 2 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 3 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 4 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 5 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 6 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 7 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 8 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 11
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 12
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 13
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 15
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 16
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 17
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 18
## imported as numeric
subsetData <- fetch(query)
head(subsetData[1:5])
## bin matches misMatches repMatches nCount
## 1 585 723 3 0 6
## 2 586 740 2 0 26
## 3 73 986 3 0 17
## 4 587 741 1 0 26
## 5 587 985 1 0 17
## 6 587 938 1 0 0
nrow(subsetData)
## [1] 500
I’m interested in finding quantile of misMatches
quantile(subsetData$misMatches)
## 0% 25% 50% 75% 100%
## 1 1 2 2 3
I want to retrieve only 10 rows
subsubsetData <- fetch(res = query, n = 10)
dim(subsubsetData)
## [1] 10 22
I’m now going to clear the result query
dbClearResult(res = query)
## [1] TRUE
Don’t forget to close the connection
dbDisconnect(conn = hg19)
## [1] TRUE