skip to Main Content

I have a large JSON file (383,255KB). It contains street names, addresses, and lat/long values.
I think I’m right in saying this is a WGS84 World Geodetic System 84 file format.

All I want to do is query this data, in "R".
I’m thinking in SQL, because it is efficient in this context, but
I am not aware of any package/method to take this approach.

I would like to:

  select Latitude, Longitude From JSONData Where Address = "160" and LFNAME = "Transit Rd"

The structure (and a few relevant data blocks) of the JSON data file are shown here:

{
    "type" : "FeatureCollection",
    "name" : "ADDRESS_POINT_WGS84",
    "features" : [
        {
            "type" : "Feature",
            "geometry" : {
                "type" : "Point",
                "coordinates" : [ -79.539538708, 43.586921565 ]
            },
            "properties" : {
                "GEO_ID" : 5729529,
                "LINK" : 5729516,
                "MAINT_STAG" : "REGULAR",
                "ADDRESS" : "399",
                "LFNAME" : "Lake Promenade",
                "LO_NUM" : 399,
                "LONUMSUF" : null,
                "HINUM" : null,
                "HINUMSUF" : null,
                "ARC_SIDE" : "L",
                "DISTANCE" : 4.15,
                "FCODE" : 115001,
                "FCODE_DES" : "Unknown",
                "CLASS" : "Land",
                "NAME" : null,
                "X" : 301607.151,
                "Y" : 4827443.931,
                "LONGITUDE" : -79.5395419864,
                "LATITUDE" : 43.5869301259,
                "OBJECTID" : 1491585,
                "MUN_NAME" : "Etobicoke",
                "WARD_NAME" : "Etobicoke-Lakeshore"
            }
        },
        {
            "type" : "Feature",
            "geometry" : {
                "type" : "Point",
                "coordinates" : [ -79.540371034, 43.5882630139 ]
            },
            "properties" : {
                "GEO_ID" : 9950585,
                "LINK" : 9950578,
                "MAINT_STAG" : "REGULAR",
                "ADDRESS" : "7",
                "LFNAME" : "Hilo Rd",
                "LO_NUM" : 7,
                "LONUMSUF" : null,
                "HINUM" : null,
                "HINUMSUF" : null,
                "ARC_SIDE" : "L",
                "DISTANCE" : 43.77,
                "FCODE" : 115001,
                "FCODE_DES" : "Unknown",
                "CLASS" : "Land",
                "NAME" : null,
                "X" : 301540.011,
                "Y" : 4827592.989,
                "LONGITUDE" : -79.5403743125,
                "LATITUDE" : 43.5882715763,
                "OBJECTID" : 1491586,
                "MUN_NAME" : "Etobicoke",
                "WARD_NAME" : "Etobicoke-Lakeshore"
            }
        },
        {
            "type" : "Feature",
            "geometry" : {
                "type" : "Point",
                "coordinates" : [ -79.5390269742, 43.587174988 ]
            },
            "properties" : {
                "GEO_ID" : 5729496,
                "LINK" : 5729468,
                "MAINT_STAG" : "REGULAR",
                "ADDRESS" : "387",
                "LFNAME" : "Lake Promenade",
                "LO_NUM" : 387,
                "LONUMSUF" : null,
                "HINUM" : null,
                "HINUMSUF" : null,
                "ARC_SIDE" : "L",
                "DISTANCE" : 97.58,
                "FCODE" : 115001,
                "FCODE_DES" : "Unknown",
                "CLASS" : "Land",
                "NAME" : null,
                "X" : 301648.488,
                "Y" : 4827472.065,
                "LONGITUDE" : -79.5390302568,
                "LATITUDE" : 43.5871835468,
                "OBJECTID" : 1491591,
                "MUN_NAME" : "Etobicoke",
                "WARD_NAME" : "Etobicoke-Lakeshore"
            }
        },
        {
            "type" : "Feature",
            "geometry" : {
                "type" : "Point",
                "coordinates" : [ -79.3880422943, 43.6830929049 ]
            },
            "properties" : {
                "GEO_ID" : 850466,
                "LINK" : 20002604,
                "MAINT_STAG" : "REGULAR",
                "ADDRESS" : "1",
                "LFNAME" : "Ottawa St",
                "LO_NUM" : 1,
                "LONUMSUF" : null,
                "HINUM" : null,
                "HINUMSUF" : null,
                "ARC_SIDE" : "R",
                "DISTANCE" : 3.99,
                "FCODE" : 115001,
                "FCODE_DES" : "Unknown",
                "CLASS" : "Land",
                "NAME" : null,
                "X" : 313826.457,
                "Y" : 4838133.322,
                "LONGITUDE" : -79.388042294,
                "LATITUDE" : 43.683092902,
                "OBJECTID" : 4675851,
                "MUN_NAME" : "former Toronto",
                "WARD_NAME" : "University-Rosedale"
            }
        }
    ]
}

I will be looking up lat/long values for thousands of city addresses, so it makes sense to have the data in memory.

I haven’t completely read this article: https://themockup.blog/posts/2020-05-22-parsing-json-in-r-with-jsonlite/

I’ve read about "Rectangling" here: https://tidyr.tidyverse.org/articles/rectangle.html.
I monkeyed about, it seems too complicated to me.

I must be going about this the wrong way, and I honestly don’t know what approach to take.
I’m just determining lat/long values for addresses.
Once I have the lat/long values for the "items" I’m tracking, I will show those items on a map.

Isn’t there another better way?

Thank you for any tips/advice/pointers.

Richard

2

Answers


  1. library(jsonlite)
    library(tidyverse)
    
    # Store the given string in a variable
    json_string <- '{ "type" : "FeatureCollection", "name" : "ADDRESS_POINT_WGS84", "features" : [ { "type" : "Feature", "geometry" : { "type" : "Point", "coordinates" : [ -79.539538708, 43.586921565 ] }, "properties" : { "GEO_ID" : 5729529, "LINK" : 5729516, "MAINT_STAG" : "REGULAR", "ADDRESS" : "399", "LFNAME" : "Lake Promenade", "LO_NUM" : 399, "LONUMSUF" : null, "HINUM" : null, "HINUMSUF" : null, "ARC_SIDE" : "L", "DISTANCE" : 4.15, "FCODE" : 115001, "FCODE_DES" : "Unknown", "CLASS" : "Land", "NAME" : null, "X" : 301607.151, "Y" : 4827443.931, "LONGITUDE" : -79.5395419864, "LATITUDE" : 43.5869301259, "OBJECTID" : 1491585, "MUN_NAME" : "Etobicoke", "WARD_NAME" : "Etobicoke-Lakeshore" } } ] }'
    
    # Convert the string to a JSON object
    json_data <- jsonlite::fromJSON(json_string)
    my_df <- json_data$features
    my_df <- my_df %>% 
      unnest_wider(col="properties")
    names(my_df)
    #>  [1] "type"       "geometry"   "GEO_ID"     "LINK"       "MAINT_STAG"
    #>  [6] "ADDRESS"    "LFNAME"     "LO_NUM"     "LONUMSUF"   "HINUM"     
    #> [11] "HINUMSUF"   "ARC_SIDE"   "DISTANCE"   "FCODE"      "FCODE_DES" 
    #> [16] "CLASS"      "NAME"       "X"          "Y"          "LONGITUDE" 
    #> [21] "LATITUDE"   "OBJECTID"   "MUN_NAME"   "WARD_NAME"
    
    my_df %>% 
      filter(ADDRESS=="399") %>% 
      filter(LFNAME=="Lake Promenade")
    #> # A tibble: 1 × 24
    #>   type    geometry$type  GEO_ID   LINK MAINT_STAG ADDRESS LFNAME LO_NUM LONUMSUF
    #>   <chr>   <chr>           <int>  <int> <chr>      <chr>   <chr>   <int> <lgl>   
    #> 1 Feature Point         5729529 5.73e6 REGULAR    399     Lake …    399 NA      
    #> # ℹ 16 more variables: geometry$coordinates <list>, HINUM <lgl>,
    #> #   HINUMSUF <lgl>, ARC_SIDE <chr>, DISTANCE <dbl>, FCODE <int>,
    #> #   FCODE_DES <chr>, CLASS <chr>, NAME <lgl>, X <dbl>, Y <dbl>,
    #> #   LONGITUDE <dbl>, LATITUDE <dbl>, OBJECTID <int>, MUN_NAME <chr>,
    #> #   WARD_NAME <chr>
    

    Created on 2023-05-10 with reprex v2.0.2

    Login or Signup to reply.
  2. Provided file sample is valid GeoJSON and spatial packages, like sf, can handle it directly. sf object is basically a dataframe with special geometry column(s), so you mostly use your standard data wrangling tools and methods to deal with attributes, be it base R or dplyr / Tidyverse. If you really want to, you can also use SQL queries with sf (only available when reading a file, so every single query reads the whole dataset from disk to memory, at least when it’s stored as json). Or use sqldf package to use SQL on dataframes, though you’d then miss direct access to sf tools – https://r-spatial.github.io/sf/

    library(sf)
    library(dplyr)
    
    # read sample dataset, saved as sample.geojson
    sample_sf <- st_read("sample.geojson", quiet = TRUE, as_tibble = TRUE)
    sample_sf
    #> Simple feature collection with 4 features and 22 fields
    #> Geometry type: POINT
    #> Dimension:     XY
    #> Bounding box:  xmin: -79.54037 ymin: 43.58692 xmax: -79.38804 ymax: 43.68309
    #> Geodetic CRS:  WGS 84
    #> # A tibble: 4 × 23
    #>    GEO_ID     LINK MAINT_STAG ADDRESS LFNAME      LO_NUM LONUMSUF HINUM HINUMSUF
    #>     <int>    <int> <chr>      <chr>   <chr>        <int> <chr>    <chr> <chr>   
    #> 1 5729529  5729516 REGULAR    399     Lake Prome…    399 <NA>     <NA>  <NA>    
    #> 2 9950585  9950578 REGULAR    7       Hilo Rd          7 <NA>     <NA>  <NA>    
    #> 3 5729496  5729468 REGULAR    387     Lake Prome…    387 <NA>     <NA>  <NA>    
    #> 4  850466 20002604 REGULAR    1       Ottawa St        1 <NA>     <NA>  <NA>    
    #> # ℹ 14 more variables: ARC_SIDE <chr>, DISTANCE <dbl>, FCODE <int>,
    #> #   FCODE_DES <chr>, CLASS <chr>, NAME <chr>, X <dbl>, Y <dbl>,
    #> #   LONGITUDE <dbl>, LATITUDE <dbl>, OBJECTID <int>, MUN_NAME <chr>,
    #> #   WARD_NAME <chr>, geometry <POINT [°]>
    
    # filter and plot results with mapview
    sample_sf %>%
      filter(ADDRESS == "7",LFNAME == "Hilo Rd") %>%
      mapview::mapview()
    

    # use SQL query on spatial dataset:  
    st_read("sample.geojson", quiet = TRUE, as_tibble = TRUE, 
            query = "select Latitude, Longitude From ADDRESS_POINT_WGS84 Where ADDRESS = '7'")
    #> Simple feature collection with 1 feature and 2 fields
    #> Geometry type: POINT
    #> Dimension:     XY
    #> Bounding box:  xmin: -79.54037 ymin: 43.58826 xmax: -79.54037 ymax: 43.58826
    #> Geodetic CRS:  WGS 84
    #> # A tibble: 1 × 3
    #>   Latitude Longitude             geometry
    #>      <dbl>     <dbl>          <POINT [°]>
    #> 1     43.6     -79.5 (-79.54037 43.58826)
    
    # You might want to consider more compact storage options than json 
    # for larger datasets, for example GeoPackage, which is actually an SQLite file.
    st_write(sample_sf, "sample.gpkg") 
    
    # or just R rds:
    readr::write_rds(sample_sf, "sample.rds")
    
    # Though for this small sample, the resulting gpkg size exceeds json's:
    list.files() %>%  file.info() %>% `[`(1)
    #>                 size
    #> sample.geojson  3537
    #> sample.gpkg    98304
    #> sample.rds      3411
    
    # For reading, just replace the filename; sf guesses the type / driver
    # from extension
    sample_g <- st_read("sample.gpkg", quiet = TRUE, as_tibble = TRUE)
    

    Created on 2023-05-10 with reprex v2.0.2

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search