Start by making a project to keep all scripts and data files together. getwd() setwd(…)

if (!file.exists("Data")) {

? download.file

URL of Baltimore Fixed Speed Camera Dataset -

fileURL <- ""

download.file(url = fileURL, destfile = "Data/cameras.csv", method = "curl", extra = c("-k"))
list.files(path = "Data")
## [1] "cameras.csv"  "cameras.xlsx"
dateDownloaded <- date()
## [1] "Sun Jun 28 19:35:34 2015"

1 Reading Local Files

cameraData <- read.csv(file = "Data/cameras.csv")
## 'data.frame':    80 obs. of  6 variables:
##  $ address     : Factor w/ 71 levels "E 33RD ST & THE ALAMEDA",..: 49 49 70 57 1 14 14 31 5 35 ...
##  $ direction   : Factor w/ 4 levels "E/B","N/B","S/B",..: 2 3 1 3 1 1 4 3 4 1 ...
##  $ street      : Factor w/ 55 levels "Caton Ave","Charles",..: 1 1 54 50 4 8 8 2 26 33 ...
##  $ crossStreet : Factor w/ 66 levels "33rd St","4th St",..: 6 6 49 1 58 40 40 36 7 38 ...
##  $ intersection: Factor w/ 74 levels " &","Caton Ave & Benson Ave",..: 2 2 73 69 7 13 13 3 35 47 ...
##  $ Location.1  : Factor w/ 76 levels "(39.1999130165, -76.5559766825)",..: 7 6 8 49 48 35 36 74 32 29 ...
##                          address direction      street  crossStreet
## 1       S CATON AVE & BENSON AVE       N/B   Caton Ave   Benson Ave
## 2       S CATON AVE & BENSON AVE       S/B   Caton Ave   Benson Ave
## 3 WILKENS AVE & PINE HEIGHTS AVE       E/B Wilkens Ave Pine Heights
## 4        THE ALAMEDA & E 33RD ST       S/B The Alameda      33rd St
## 5        E 33RD ST & THE ALAMEDA       E/B      E 33rd  The Alameda
## 6        ERDMAN AVE & N MACON ST       E/B      Erdman     Macon St
##                 intersection                      Location.1
## 1     Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
## 2     Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
## 3 Wilkens Ave & Pine Heights  (39.2720252302, -76.676960806)
## 4     The Alameda  & 33rd St (39.3285013141, -76.5953545714)
## 5      E 33rd  & The Alameda (39.3283410623, -76.5953594625)
## 6         Erdman  & Macon St (39.3068045671, -76.5593167803)

2 Working with Excel Files

fileURLxlsx <- ""
download.file(url = fileURLxlsx, destfile = "Data/cameras.xlsx", extra = "-k", method = "curl")

dateDownloadedXLSX <- date()

## Loading required package: rJava
## Loading required package: xlsxjars
cameraDataXLSX <- read.xlsx(file = "Data/cameras.xlsx", sheetIndex = 1, header = TRUE)
##                          address direction      street  crossStreet
## 1       S CATON AVE & BENSON AVE       N/B   Caton Ave   Benson Ave
## 2       S CATON AVE & BENSON AVE       S/B   Caton Ave   Benson Ave
## 3 WILKENS AVE & PINE HEIGHTS AVE       E/B Wilkens Ave Pine Heights
## 4        THE ALAMEDA & E 33RD ST       S/B The Alameda      33rd St
## 5        E 33RD ST & THE ALAMEDA       E/B      E 33rd  The Alameda
## 6        ERDMAN AVE & N MACON ST       E/B      Erdman     Macon St
##                 intersection                      Location.1
## 1     Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
## 2     Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
## 3 Wilkens Ave & Pine Heights  (39.2720252302, -76.676960806)
## 4     The Alameda  & 33rd St (39.3285013141, -76.5953545714)
## 5      E 33rd  & The Alameda (39.3283410623, -76.5953594625)
## 6         Erdman  & Macon St (39.3068045671, -76.5593167803)

3 Reading specific rows and columns from an excel file

cameraDataXLSXsubset <- read.xlsx(file = "Data/cameras.xlsx", sheetIndex = 1, header = TRUE, rowIndex = 3:10, colIndex = 2:5)
##   S.B   Caton.Ave   Benson.Ave     Caton.Ave...Benson.Ave
## 1 E/B Wilkens Ave Pine Heights Wilkens Ave & Pine Heights
## 2 S/B The Alameda      33rd St     The Alameda  & 33rd St
## 3 E/B      E 33rd  The Alameda      E 33rd  & The Alameda
## 4 E/B      Erdman     Macon St         Erdman  & Macon St
## 5 W/B      Erdman     Macon St         Erdman  & Macon St
## 6 S/B     Charles     Lake Ave         Charles & Lake Ave
## 7 W/B     Madison  Caroline St     Madison  & Caroline St

You may even try XLConnect package to avail advanced options for working with excel files Freeing my RAM

rm("cameraData", "cameraDataXLSX", "cameraDataXLSXsubset")
rm("dateDownloaded", "dateDownloadedXLSX", "fileURL", "fileURLxlsx")

For details on XML, you may refer to #Working with XML files

fileURLxml <- ""
simpleDoc <- xmlTreeParse(file = fileURLxml, useInternalNodes = TRUE)

Notice that simpleDoc is stored in RAM and no .xml file exists in Data folder. Of course it won’t exist, you did not specify the path here. But just in case you thought that it would be treated similar to .csv or .xlsx, I thought to mention it.

simpleRootNode <- xmlRoot(simpleDoc)
## [1] "breakfast_menu"
##   food   food   food   food   food
## "food" "food" "food" "food" "food"
## [1] "XMLInternalElementNode" "XMLInternalNode"
## [3] "XMLAbstractNode"

4 Notice the differences carefully for the following 4 commands

Command 1

## $food
## <food>
##   <name>Belgian Waffles</name>
##   <price>$5.95</price>
##   <description>Two of our famous Belgian Waffles with plenty of real maple syrup</description>
##   <calories>650</calories>
## </food>
## attr(,"class")
## [1] "XMLInternalNodeList" "XMLNodeList"

Command 2

## <food>
##   <name>Belgian Waffles</name>
##   <price>$5.95</price>
##   <description>Two of our famous Belgian Waffles with plenty of real maple syrup</description>
##   <calories>650</calories>
## </food>

Command 3

## $name
## <name>Belgian Waffles</name>
## attr(,"class")
## [1] "XMLInternalNodeList" "XMLNodeList"

Command 4

## <name>Belgian Waffles</name>

The way the above statements execute, shows that you should access elements of simpleRootNode as in a list. You can programmatically extract different parts of the file

xmlSApply(X = simpleRootNode, FUN = xmlValue)
##                                                                                                                     food
##                               "Belgian Waffles$5.95Two of our famous Belgian Waffles with plenty of real maple syrup650"
##                                                                                                                     food
##                    "Strawberry Belgian Waffles$7.95Light Belgian waffles covered with strawberries and whipped cream900"
##                                                                                                                     food
## "Berry-Berry Belgian Waffles$8.95Light Belgian waffles covered with an assortment of fresh berries and whipped cream900"
##                                                                                                                     food
##                                                "French Toast$4.50Thick slices made from our homemade sourdough bread600"
##                                                                                                                     food
##                         "Homestyle Breakfast$6.95Two eggs, bacon or sausage, toast, and our ever-popular hash browns950"

You may try XPath language for completely exploring any XML file

xpathSApply(doc = simpleRootNode, path = "//name", fun = xmlValue)
## [1] "Belgian Waffles"             "Strawberry Belgian Waffles"
## [3] "Berry-Berry Belgian Waffles" "French Toast"
## [5] "Homestyle Breakfast"
xpathSApply(doc = simpleRootNode, path = "//price", fun = xmlValue)
## [1] "$5.95" "$7.95" "$8.95" "$4.50" "$6.95"

5 Drilling into Baltimore-Ravens

brFileURL <- ""
brDoc <- htmlTreeParse(file = brFileURL, useInternalNodes = TRUE)
records <- xpathSApply(doc = brDoc, path = "//li[@class = 'record']", fun = xmlValue)
## [1] "10-6" "10-6"
teams <- xpathSApply(doc = brDoc, path = "//li[@class = 'team-name']", fun = xmlValue)
## [1] "Baltimore RavensRavens" "Baltimore RavensRavens"

For details on JSON, visit - #Working with JSON

## Attaching package: 'jsonlite'
## The following object is masked from 'package:utils':
##     View
jsonData <- fromJSON(txt = "")
##  [1] "id"                "name"              "full_name"
##  [4] "owner"             "private"           "html_url"
##  [7] "description"       "fork"              "url"
## [10] "forks_url"         "keys_url"          "collaborators_url"
## [13] "teams_url"         "hooks_url"         "issue_events_url"
## [16] "events_url"        "assignees_url"     "branches_url"
## [19] "tags_url"          "blobs_url"         "git_tags_url"
## [22] "git_refs_url"      "trees_url"         "statuses_url"
## [25] "languages_url"     "stargazers_url"    "contributors_url"
## [28] "subscribers_url"   "subscription_url"  "commits_url"
## [31] "git_commits_url"   "comments_url"      "issue_comment_url"
## [34] "contents_url"      "compare_url"       "merges_url"
## [37] "archive_url"       "downloads_url"     "issues_url"
## [40] "pulls_url"         "milestones_url"    "notifications_url"
## [43] "labels_url"        "releases_url"      "created_at"
## [46] "updated_at"        "pushed_at"         "git_url"
## [49] "ssh_url"           "clone_url"         "svn_url"
## [52] "homepage"          "size"              "stargazers_count"
## [55] "watchers_count"    "language"          "has_issues"
## [58] "has_downloads"     "has_wiki"          "has_pages"
## [61] "forks_count"       "mirror_url"        "open_issues_count"
## [64] "forks"             "open_issues"       "watchers"
## [67] "default_branch"
##  [1] "login"               "id"                  "avatar_url"
##  [4] "gravatar_id"         "url"                 "html_url"
##  [7] "followers_url"       "following_url"       "gists_url"
## [10] "starred_url"         "subscriptions_url"   "organizations_url"
## [13] "repos_url"           "events_url"          "received_events_url"
## [16] "type"                "site_admin"
##  [1] "login"               "id"                  "avatar_url"
##  [4] "gravatar_id"         "url"                 "html_url"
##  [7] "followers_url"       "following_url"       "gists_url"
## [10] "starred_url"         "subscriptions_url"   "organizations_url"
## [13] "repos_url"           "events_url"          "received_events_url"
## [16] "type"                "site_admin"
##  [1] ""
##  [2] ""
##  [3] ""
##  [4] ""
##  [5] ""
##  [6] ""
##  [7] ""
##  [8] ""
##  [9] ""
## [10] ""
## [11] ""
## [12] ""
## [13] ""
## [14] ""
## [15] ""
## [16] ""
## [17] ""
## [18] ""
## [19] ""
## [20] ""
## [21] ""
## [22] ""
## [23] ""
## [24] ""
## [25] ""
## [26] ""
## [27] ""
## [28] ""
## [29] ""
## [30] ""

6 Convert to JSON

?iris #' Edgar Anderson's Iris Data
iris.json <- toJSON(iris, pretty = TRUE) #' pretty = TRUE will give you nice indentation
? cat #' Concatenate and Print
7 Convert back from JSON

iris.df <- fromJSON(txt = iris.json)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : chr  "setosa" "setosa" "setosa" "setosa" ...