skip to Main Content

I have a JSON string that I would like to spearate into new variables. I feel there must be a clever way but I cannot find the solution.

df <- structure(list(subject = c("dtv85251vucquc45", "mcj8vdqz7sxmjcr0"
), response = c("{"P0_Q0":{"aktiv":2,"bekümmert":3,"interessiert":4,"freudig erregt":2,"verärgert":2,"stark":0,"schuldig":1,"erschrocken":1,"feindselig":1,"angeregt":2},"P1_Q0":{"stolz":1,"gereizt":1,"begeistert":2,"beschämt":2,"wach":1,"nervös":1,"entschlossen":1,"ängstlich":1,"aufmerksam":2,"durcheinander":2}}", 
                "{"P0_Q0":{"aktiv":1,"bekümmert":3,"interessiert":1,"freudig erregt":1,"verärgert":0,"stark":0,"schuldig":2,"erschrocken":0,"feindselig":0,"angeregt":1},"P1_Q0":{"stolz":2,"gereizt":0,"begeistert":1,"beschämt":0,"wach":2,"nervös":0,"entschlossen":0,"aufmerksam":2,"durcheinander":0,"ängstlich":0}}"
)), class = c("tbl_df", 
              "tbl", "data.frame"), row.names = 1:2, class = "data.frame")

desired output:

output <- structure(list(subject = c("dtv85251vucquc45", "mcj8vdqz7sxmjcr0"
), aktiv = 2:1, bekümmert = c(3L, 3L), interessiert = c(4L, 
1L), freudig.erregt = 2:1, verärgert = c(2L, 0L), 
    stark = c(0L, 0L), schuldig = 1:2, erschrocken = 1:0, 
    feindselig = 1:0, angeregt = 2:1, stolz = 1:2, 
    gereizt = 1:0, begeistert = 2:1, beschämt = c(2L, 
    0L), wach = 1:2, nervös = 1:0, entschlossen = 1:0, 
    ängstlich = 1:0, aufmerksam = c(2L, 2L), durcheinander = c(2L, 
    0L)), class = "data.frame", row.names = c(NA, -2L))

what kind of works (but not tidy):

j <- list()
for(i in 1:nrow(df)){
  j[[i]] <- as.data.frame(jsonlite::fromJSON(df$response[i]))
}

output <- data.frame(subject=df$subject, bind_rows(j))

Is there a better way?

2

Answers


  1. I don’t know about ‘clever’, but {rjsoncons} implements ‘jmespath’ queries. I wrote a little helper function to convert from json to R via jmespath

    from_jmespath = function(json, path) {
        rjsoncons::jmespath(json, path) |>
            jsonlite::fromJSON()
    }
    

    and then extracted the relevant information from the rows of your data frame

    keys = from_jmespath(df$response[1], "*.keys(@)[]")
    values = lapply(df$response, from_jmespath, "*.values(@)[]")
    

    I then constructed a tidy tibble from the various components

    tbl = dplyr::tibble(
        subject = rep(df$subject, each = length(keys)),
        key = rep(keys, nrow(df)),
        value = unlist(values)
    )
    

    A ‘wide’ version is

    tbl |>
        tidyr::pivot_wider(names_from = "key", values_from = "value")
    
    Login or Signup to reply.
  2. jsonlite::stream_in works well with this.

    library(dplyr)
    jsonlite::stream_in(textConnection(df$response), simplifyDataFrame = FALSE) %>%
      lapply(bind_cols) %>%
      bind_rows() %>%
      bind_cols(df[,1,drop=FALSE], .)
    #  Imported 2 records. Simplifying...
    #            subject aktiv bekümmert interessiert freudig erregt verärgert stark
    # 1 dtv85251vucquc45     2         3            4              2         2     0
    # 2 mcj8vdqz7sxmjcr0     1         3            1              1         0     0
    #   schuldig erschrocken feindselig angeregt stolz gereizt begeistert beschämt
    # 1        1           1          1        2     1       1          2        2
    # 2        2           0          0        1     2       0          1        0
    #   wach nervös entschlossen ängstlich aufmerksam durcheinander
    # 1    1      1            1         1          2             2
    # 2    2      0            0         0          2             0
    

    or slightly refactored:

    library(dplyr)
    library(tidyr) # unnest
    df %>%
      mutate(
        response = jsonlite::stream_in(textConnection(response), simplifyDataFrame = FALSE),
        response = lapply(response, bind_cols)
      ) %>%
      unnest(response)
    #  Imported 2 records. Simplifying...
    # # A tibble: 2 × 21
    #   subject      aktiv bekümmert interessiert `freudig erregt` verärgert stark schuldig erschrocken feindselig angeregt
    #   <chr>        <int>     <int>        <int>            <int>     <int> <int>    <int>       <int>      <int>    <int>
    # 1 dtv85251vuc…     2         3            4                2         2     0        1           1          1        2
    # 2 mcj8vdqz7sx…     1         3            1                1         0     0        2           0          0        1
    # # ℹ 10 more variables: stolz <int>, gereizt <int>, begeistert <int>, beschämt <int>, wach <int>, nervös <int>,
    # #   entschlossen <int>, ängstlich <int>, aufmerksam <int>, durcheinander <int>
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search