openadds
talks to Openaddresses.io. a run down of its things:
Install
devtools::install_github("sckott/openadds")
library("openadds")
List datasets
Scrapes links to datasets from the openaddresses site
dat <- oa_list()
dat[2:6]
#> [1] "https://data.openaddresses.io.s3.amazonaws.com/20150511/au-tas-launceston.csv"
#> [2] "https://s3.amazonaws.com/data.openaddresses.io/20141127/au-victoria.zip"
#> [3] "https://data.openaddresses.io.s3.amazonaws.com/20150511/be-flanders.zip"
#> [4] "https://data.openaddresses.io.s3.amazonaws.com/20150417/ca-ab-calgary.zip"
#> [5] "https://data.openaddresses.io.s3.amazonaws.com/20150511/ca-ab-grande_prairie.zip"
Search for datasets
Uses oa_list()
internally, then searches through columns requested.
oa_search(country = "us", state = "ca")
#> Source: local data frame [68 x 5]
#>
#> country state city ext
#> 1 us ca san_mateo_county .zip
#> 2 us ca alameda_county .zip
#> 3 us ca alameda_county .zip
#> 4 us ca amador .zip
#> 5 us ca amador .zip
#> 6 us ca bakersfield .zip
#> 7 us ca bakersfield .zip
#> 8 us ca berkeley .zip
#> 9 us ca berkeley .zip
#> 10 us ca butte_county .zip
#> .. ... ... ... ...
#> Variables not shown: url (chr)
Get data
Passing in a URL
(out1 <- oa_get(dat[5]))
#> <Openaddresses data> ~/.openadds/ca-ab-calgary.zip
#> Dimensions [350962, 13]
#>
#> OBJECTID ADDRESS_TY ADDRESS STREET_NAM STREET_TYP
#> 0 757023 Parcel 249 SAGE MEADOWS CI NW SAGE MEADOWS CI
#> 1 757022 Parcel 2506 17 ST SE 17 ST
#> 2 757021 Parcel 305 EVANSPARK GD NW EVANSPARK GD
#> 3 757020 Parcel 321 EVANSPARK GD NW EVANSPARK GD
#> 4 757019 Parcel 204 EVANSBROOKE LD NW EVANSBROOKE LD
#> 5 757018 Parcel 200 EVANSBROOKE LD NW EVANSBROOKE LD
#> 6 757017 Parcel 219 HIDDEN VALLEY LD NW HIDDEN VALLEY LD
#> 7 757016 Parcel 211 HIDDEN VALLEY LD NW HIDDEN VALLEY LD
#> 8 757015 Parcel 364 HIDDEN VALLEY LD NW HIDDEN VALLEY LD
#> 9 757014 Parcel 348 HIDDEN VALLEY LD NW HIDDEN VALLEY LD
#> .. ... ... ... ... ...
#> Variables not shown: STREET_QUA (fctr), HOUSE_NUMB (int), HOUSE_ALPH
#> (fctr), SUITE_NUMB (int), SUITE_ALPH (fctr), LONGITUDE (dbl),
#> LATITUDE (dbl), COMM_NAME (fctr)
First getting URL for dataset through as_openadd()
, then passing to oa_get()
(x <- as_openadd("us", "nm", "hidalgo"))
#> <<OpenAddreses>>
#> <<country>> us
#> <<state>> nm
#> <<city>> hidalgo
#> <<extension>> .csv
oa_get(x)
#> <Openaddresses data> ~/.openadds/us-nm-hidalgo.csv
#> Dimensions [170659, 37]
#>
#> OBJECTID Shape ADD_NUM ADD_SUF PRE_MOD PRE_DIR PRE_TYPE ST_NAME
#> 1 1 NA 422 S 2ND
#> 2 2 NA 1413 S 4TH
#> 3 3 NA 412 E CHAMPION
#> 4 4 NA 110 E SAMANO
#> 5 5 NA 2608 W FREDDY GONZALEZ
#> 6 6 NA 2604 W FREDDY GONZALEZ
#> 7 7 NA 1123 W FAY
#> 8 8 NA 417 S 2ND
#> 9 9 NA 4551 E TEXAS
#> 10 10 NA 810 DRIFTWOOD
#> .. ... ... ... ... ... ... ... ...
#> Variables not shown: ST_TYPE (chr), POS_DIR (chr), POS_MOD (chr), ESN
#> (int), MSAG_COMM (chr), PARCEL_ID (chr), PLACE_TYPE (chr), LANDMARK
#> (chr), BUILDING (chr), UNIT (chr), ROOM (chr), FLOOR (int), LOC_NOTES
#> (chr), ST_ALIAS (chr), FULL_ADDR (chr), ZIP (chr), POSTAL_COM (chr),
#> MUNICIPAL (chr), COUNTY (chr), STATE (chr), SOURCE (chr), REGION
#> (chr), EXCH (chr), LAT (dbl), LONG (dbl), PICTURE (chr), OA:x (dbl),
#> OA:y (dbl), OA:geom (chr)
Combine multiple datasets
combine
attemps to guess lat/long and address columns, but definitely more work to do to make
this work for most cases. Lat/long and address columns vary among every dataset - some datasets
have no lat/long data, some have no address data.
out2 <- oa_get(dat[32])
(alldat <- oa_combine(out1, out2))
#> Source: local data frame [418,623 x 4]
#>
#> lon lat address dataset
#> 1 -114.1303 51.17188 249 SAGE MEADOWS CI NW ca-ab-calgary.zip
#> 2 -114.0190 51.03168 2506 17 ST SE ca-ab-calgary.zip
#> 3 -114.1175 51.17497 305 EVANSPARK GD NW ca-ab-calgary.zip
#> 4 -114.1175 51.17461 321 EVANSPARK GD NW ca-ab-calgary.zip
#> 5 -114.1212 51.16268 204 EVANSBROOKE LD NW ca-ab-calgary.zip
#> 6 -114.1213 51.16264 200 EVANSBROOKE LD NW ca-ab-calgary.zip
#> 7 -114.1107 51.14784 219 HIDDEN VALLEY LD NW ca-ab-calgary.zip
#> 8 -114.1108 51.14768 211 HIDDEN VALLEY LD NW ca-ab-calgary.zip
#> 9 -114.1121 51.14780 364 HIDDEN VALLEY LD NW ca-ab-calgary.zip
#> 10 -114.1117 51.14800 348 HIDDEN VALLEY LD NW ca-ab-calgary.zip
#> .. ... ... ... ...
Map data
Get some data
(out <- oa_get(dat[400]))
#> <Openaddresses data> ~/.openadds/us-ca-sonoma_county.zip
#> Dimensions [217243, 5]
#>
#> LON LAT NUMBER STREET POSTCODE
#> 1 -122.5327 38.29779 3771 A Cory Lane NA
#> 2 -122.5422 38.30354 18752 White Oak Drive NA
#> 3 -122.5412 38.30327 18749 White Oak Drive NA
#> 4 -122.3997 38.26122 3552 Napa Road NA
#> 5 -122.5425 38.30404 3998 White Oak Court NA
#> 6 -122.5429 38.30434 4026 White Oak Court NA
#> 7 -122.5430 38.30505 4039 White Oak Court NA
#> 8 -122.5417 38.30504 4017 White Oak Court NA
#> 9 -122.5409 38.30436 18702 White Oak Drive NA
#> 10 -122.5403 38.30392 18684 White Oak Drive NA
#> .. ... ... ... ... ...
Make an interactive map (not all data)
library("leaflet")
x <- oa_get(oa_search(country = "us", city = "boulder")[1,]$url)
y <- oa_get(oa_search(country = "us", city = "gunnison")[1,]$url)
oa_combine(x, y) %>%
leaflet() %>%
addTiles() %>%
addCircles(lat = ~lat, lng = ~lon, popup = ~address)
To do
- Surely there are many datasets that won’t work in
oa_combine()
- gotta go through many more. - An easy viz function wrapping
leaflet
- Since you can get a lot of spatial data quickly, easy way to visualize big data, maybe marker clusters?