1 Reading from MySQL

install.packages(“RMySQL”)
URL - http://genome.ucsc.edu/goldenpath/help/mysql.html

library(RMySQL)
## Loading required package: DBI

2 Connect to the database server

ucscDB <- dbConnect(MySQL(), user = "genome", host = "genome-mysql.cse.ucsc.edu")

3 Query the database server

result <- dbGetQuery(ucscDB, "show databases;")
head(result)
##             Database
## 1 information_schema
## 2            ailMel1
## 3            allMis1
## 4            anoCar1
## 5            anoCar2
## 6            anoGam1

It is important to disconnect from the server when you’re done.

dbDisconnect(ucscDB)
## [1] TRUE

4 Connecting to a database

I’m connecting to hg19 database

hg19 <- dbConnect(MySQL(), user = "genome", host = "genome-mysql.cse.ucsc.edu", db = "hg19")

allTables <- dbListTables(conn = hg19)
head(allTables)
## [1] "HInv"         "HInvGeneMrna" "acembly"      "acemblyClass"
## [5] "acemblyPep"   "affyCytoScan"
length(allTables)
## [1] 11013
str(allTables)
##  chr [1:11013] "HInv" "HInvGeneMrna" "acembly" "acemblyClass" ...

You can do further exploration as needed

allTables[1:100]
##   [1] "HInv"                                           
##   [2] "HInvGeneMrna"                                   
##   [3] "acembly"                                        
##   [4] "acemblyClass"                                   
##   [5] "acemblyPep"                                     
##   [6] "affyCytoScan"                                   
##   [7] "affyExonProbeAmbiguous"                         
##   [8] "affyExonProbeCore"                              
##   [9] "affyExonProbeExtended"                          
##  [10] "affyExonProbeFree"                              
##  [11] "affyExonProbeFull"                              
##  [12] "affyExonProbesetAmbiguous"                      
##  [13] "affyExonProbesetCore"                           
##  [14] "affyExonProbesetExtended"                       
##  [15] "affyExonProbesetFree"                           
##  [16] "affyExonProbesetFull"                           
##  [17] "affyGnf1h"                                      
##  [18] "affyU133"                                       
##  [19] "affyU133Plus2"                                  
##  [20] "affyU95"                                        
##  [21] "agilentCgh1x1m"                                 
##  [22] "agilentCgh1x244k"                               
##  [23] "agilentCgh2x105k"                               
##  [24] "agilentCgh2x400k"                               
##  [25] "agilentCgh4x180k"                               
##  [26] "agilentCgh4x44k"                                
##  [27] "agilentCgh8x60k"                                
##  [28] "agilentCghSnp2x400k"                            
##  [29] "agilentCghSnp4x180k"                            
##  [30] "agilentCghSnpCancer4x180k"                      
##  [31] "agilentHrd1x1m"                                 
##  [32] "allHg19RS_BW"                                   
##  [33] "all_bacends"                                    
##  [34] "all_est"                                        
##  [35] "all_fosends"                                    
##  [36] "all_mrna"                                       
##  [37] "all_sts_primer"                                 
##  [38] "all_sts_seq"                                    
##  [39] "allenBrainAli"                                  
##  [40] "allenBrainGene"                                 
##  [41] "allenBrainUrl"                                  
##  [42] "altSeqHaplotypesP10"                            
##  [43] "altSeqHaplotypesP9"                             
##  [44] "altSeqLiftOverPslP10"                           
##  [45] "altSeqLiftOverPslP9"                            
##  [46] "altSeqPatchesP10"                               
##  [47] "author"                                         
##  [48] "bacEndPairs"                                    
##  [49] "bamAllNumtSSorted"                              
##  [50] "bamSLFeld1"                                     
##  [51] "bamSLMez1"                                      
##  [52] "bamSLSid1253"                                   
##  [53] "bamSLVi33dot16"                                 
##  [54] "bamSLVi33dot25"                                 
##  [55] "bamSLVi33dot26"                                 
##  [56] "bigFiles"                                       
##  [57] "bioCycMapDesc"                                  
##  [58] "bioCycPathway"                                  
##  [59] "burgeRnaSeqGemMapperAlignAdipose"               
##  [60] "burgeRnaSeqGemMapperAlignAdiposeAllRawSignal"   
##  [61] "burgeRnaSeqGemMapperAlignBT474"                 
##  [62] "burgeRnaSeqGemMapperAlignBT474AllRawSignal"     
##  [63] "burgeRnaSeqGemMapperAlignBrain"                 
##  [64] "burgeRnaSeqGemMapperAlignBrainAllRawSignal"     
##  [65] "burgeRnaSeqGemMapperAlignBreast"                
##  [66] "burgeRnaSeqGemMapperAlignBreastAllRawSignal"    
##  [67] "burgeRnaSeqGemMapperAlignColon"                 
##  [68] "burgeRnaSeqGemMapperAlignColonAllRawSignal"     
##  [69] "burgeRnaSeqGemMapperAlignHME"                   
##  [70] "burgeRnaSeqGemMapperAlignHMEAllRawSignal"       
##  [71] "burgeRnaSeqGemMapperAlignHeart"                 
##  [72] "burgeRnaSeqGemMapperAlignHeartAllRawSignal"     
##  [73] "burgeRnaSeqGemMapperAlignLiver"                 
##  [74] "burgeRnaSeqGemMapperAlignLiverAllRawSignal"     
##  [75] "burgeRnaSeqGemMapperAlignLymphNode"             
##  [76] "burgeRnaSeqGemMapperAlignLymphNodeAllRawSignal" 
##  [77] "burgeRnaSeqGemMapperAlignMB435"                 
##  [78] "burgeRnaSeqGemMapperAlignMB435AllRawSignal"     
##  [79] "burgeRnaSeqGemMapperAlignMCF7"                  
##  [80] "burgeRnaSeqGemMapperAlignMCF7AllRawSignal"      
##  [81] "burgeRnaSeqGemMapperAlignSkelMuscle"            
##  [82] "burgeRnaSeqGemMapperAlignSkelMuscleAllRawSignal"
##  [83] "burgeRnaSeqGemMapperAlignT47D"                  
##  [84] "burgeRnaSeqGemMapperAlignT47DAllRawSignal"      
##  [85] "burgeRnaSeqGemMapperAlignTestes"                
##  [86] "burgeRnaSeqGemMapperAlignTestesAllRawSignal"    
##  [87] "ccdsGene"                                       
##  [88] "ccdsInfo"                                       
##  [89] "ccdsKgMap"                                      
##  [90] "ccdsNotes"                                      
##  [91] "cds"                                            
##  [92] "ceBlastTab"                                     
##  [93] "cell"                                           
##  [94] "cgapAlias"                                      
##  [95] "cgapBiocDesc"                                   
##  [96] "cgapBiocPathway"                                
##  [97] "cgapSage"                                       
##  [98] "cgapSageLib"                                    
##  [99] "chainAilMel1"                                   
## [100] "chainAilMel1Link"

5 Get dimensions of some specific table

dbListFields(conn = hg19, name = allTables[100])
## [1] "bin"     "tName"   "tStart"  "tEnd"    "qStart"  "chainId"
dbGetQuery(conn = hg19, statement = paste("select count(*) from ", allTables[100], collapse = " "))
##   count(*)
## 1 42313637

6 Read from a table

tabData <- dbReadTable(conn = hg19, name = "affyU133Plus2")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 2 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 3 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 4 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 5 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 6 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 7 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 8 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 11
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 12
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 13
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 15
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 16
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 17
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 18
## imported as numeric
warnings()
## NULL
head(tabData[1:5])
##   bin matches misMatches repMatches nCount
## 1 585     530          4          0     23
## 2 585    3355         17          0    109
## 3 585    4156         14          0     83
## 4 585    4667          9          0     68
## 5 585    5180         14          0    167
## 6 585     468          5          0     14
nrow(tabData)
## [1] 58463

7 Select a specific subset

query <- dbSendQuery(conn = hg19, statement = "select * from affyU133Plus2 where misMatches between 1 and 3")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 2 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 3 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 4 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 5 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 6 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 7 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 8 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 11
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 12
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 13
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 15
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 16
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 17
## imported as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 18
## imported as numeric
subsetData <- fetch(query)
head(subsetData[1:5])
##   bin matches misMatches repMatches nCount
## 1 585     723          3          0      6
## 2 586     740          2          0     26
## 3  73     986          3          0     17
## 4 587     741          1          0     26
## 5 587     985          1          0     17
## 6 587     938          1          0      0
nrow(subsetData)
## [1] 500

I’m interested in finding quantile of misMatches

quantile(subsetData$misMatches)
##   0%  25%  50%  75% 100% 
##    1    1    2    2    3

I want to retrieve only 10 rows

subsubsetData <- fetch(res = query, n = 10)

dim(subsubsetData)
## [1] 10 22

I’m now going to clear the result query

dbClearResult(res = query)
## [1] TRUE

Don’t forget to close the connection

dbDisconnect(conn = hg19)
## [1] TRUE