skip to Main Content

I have the data coming via REST api with nested json, Trying to explode the response but its flatteing in only the first level. Need to explode the nested part also.
First step im converting data into pd df then exploding each node (bdata, edata). But not able to explode the columns(topping) inside edata.

sample data:

{
    "adata": {
        "1": {
            "xid": "012",
            "xtype": "donut",
            "xname": "xCake",
            "xppu": 0.55
        },
        "2": {
            "xid": "015",
            "xtype": "donut",
            "xname": "Cake",
            "xppu": 0.565
        },
        "3": {
            "xid": "018",
            "xtype": "donut",
            "xname": "Cakex",
            "xppu": 0.559
        }
    },
    "bdata": {
        "1": [
            {
                "yid": "00012",
                "ytype": "donut",
                "yname": "Cake",
                "yppu": 0.55
            },
            {
                "yid": "00023",
                "ytype": "donut",
                "yname": "Raised",
                "yppu": 0.554
            },
            {
                "yid": "00024",
                "ytype": "donut",
                "yname": "Raised",
                "yppu": 0.554
            }
        ],
        "2": [
            {
                "yid": "00015",
                "ytype": "donut",
                "yname": "Cake",
                "yppu": 0.565
            },
            {
                "yid": "00026",
                "ytype": "donut",
                "yname": "Raised",
                "yppu": 0.557
            },
            {
                "yid": "00027",
                "ytype": "donut",
                "yname": "Raised",
                "yppu": 0.525
            }
        ],
        "3": [
            {
                "yid": "00018",
                "ytype": "donut",
                "yname": "Cake",
                "yppu": 0.559
            },
            {
                "yid": "00039",
                "ytype": "donut",
                "yname": "Old Fashioned",
                "yppu": 0.558
            }
        ]
    },
    "edata": {
        "1": [
            {
                "eid": "03001",
                "etype": "donut",
                "name": "Cake",
                "ppu": 0.55,
                "topping": [
                    {"id": "51", "type": "None"},
                    {"id": "002", "type": "zGlazed"},
                    {"id": "05", "type": "Sugar"},
                    {"id": "5007", "type": "Powdered Sugar"},
                    {"id": "06", "type": "Chocolate with Sprinkles"},
                    {"id": "53", "type": "Chocolate"},
                    {"id": "04", "type": "Maple"}
                ]
            },
            {
                "eid": "0302",
                "etype": "donut",
                "name": "Raised",
                "ppu": 0.55
            },
            {
                "eid": "0302",
                "etype": "donut",
                "name": "Raisedz",
                "ppu": 0.55,
                "topping": "None"
            },
            {
                "eid": "03003",
                "etype": "donut",
                "name": "zOld Fashioned",
                "ppu": 0.55,
                "topping": [
                    {"id": "501", "type": "Nonex"},
                    {"id": "52", "type": "xGlazed"},
                    {"id": "503", "type": "Chocolatez"}
                ]
            }
        ],
        "2": [
            {
                "eid": "00401",
                "etype": "donut",
                "name": "Cake",
                "ppu": 0.55,
                "topping": [
                    {"id": "01", "type": "None"},
                    {"id": "2", "type": "xGlazed"},
                    {"id": "55", "type": "xSugar"},
                    {"id": "507", "type": "Powdered Sugar"},
                    {"id": "506", "type": "xChocolate with Sprinkles"},
                    {"id": "03", "type": "xChocolate"},
                    {"id": "54", "type": "xMaple"}
                ]
            },
            {
                "eid": "042",
                "etype": "donut",
                "name": "Raised",
                "ppu": 0.55
            },
            {
                "eid": "042",
                "etype": "donut",
                "name": "Raisedx",
                "ppu": 0.55,
                "topping": "None"
            }
        ],
        "3": [
            {
                "eid": "051",
                "etype": "donut",
                "name": "Cake",
                "ppu": 0.55,
                "topping": [
                    {"id": "50407", "type": "Powdered Sugarx"},
                    {"id": "50406", "type": "Chocolate with Sprinklesx"},
                    {"id": "50403", "type": "Chocolatex"},
                    {"id": "50404", "type": "Maplex"}
                ]
            },
            {
                "eid": "050403",
                "etype": "donut",
                "name": "Old Fashioned",
                "ppu": 0.55,
                "topping": [
                    {"id": "5071", "type": "None"},
                    {"id": "5072", "type": "Glazedx"},
                    {"id": "5703", "type": "Chocolatex"}
                ]
            }
        ]
    }
}

Im trying to get the result somewhat like below.

      eid   etype   name    ppu     id                   type      xid     xtype   xname   xppu    yid     ytype   yname   yppu
0   03001   donut   Cake   0.55     51                  None      012     donut   xCake   0.55  00012     donut    Cake   0.55
1   03001   donut   Cake   0.55    002                zGlazed     012     donut   xCake   0.55  00012     donut    Cake   0.55
2   03001   donut   Cake   0.55     05                 Sugar      012     donut   xCake   0.55  00012     donut    Cake   0.55
3   03001   donut   Cake   0.55   5007       Powdered Sugar      012     donut   xCake   0.55  00012     donut    Cake   0.55
4   03001   donut   Cake   0.55     06  Chocolate with Sprinkles 012     donut   xCake   0.55  00012     donut    Cake   0.55
5   03001   donut   Cake   0.55     53             Chocolate     012     donut   xCake   0.55  00012     donut    Cake   0.55
6   03001   donut   Cake   0.55     04                Maple      012     donut   xCake   0.55  00012     donut    Cake   0.55
7   0302   donut   Raised   0.55   NaN                  None     015     donut   Cake   0.565  00015     donut    Cake   0.565
8   0302   donut   Raisedz  0.55  None                  None     015     donut   Cake   0.565  00015     donut    Cake   0.565

2

Answers


  1. You can extract data with pd.json_normalize:

    def extract_bdata(bdata):
        bdata = {k: pd.json_normalize(records)
                    for k, records in bdata.items()}
        return pd.concat(bdata, names=['group']).reset_index('group').reset_index(drop=True)
    
    def extract_edata(edata):
        edata = {k: pd.json_normalize(record, 'topping', ['eid', 'etype', 'name', 'ppu'])
                    for k, records in edata.items() for record in records
                    if 'topping' in record and isinstance(record['topping'], list)}
        return pd.concat(edata, names=['group']).reset_index('group').reset_index(drop=True)
    
    # Convert your json as python data structure
    # import json
    # data = json.loads(<your input as json string>)
        
    bdata = extract_bdata(data['bdata'])
    edata = extract_edata(data['edata'])
    

    Output:

    >>> bdata
      group    yid  ytype          yname   yppu
    0     1  00012  donut           Cake  0.550
    1     1  00023  donut         Raised  0.554
    2     1  00024  donut         Raised  0.554
    3     2  00015  donut           Cake  0.565
    4     2  00026  donut         Raised  0.557
    5     2  00027  donut         Raised  0.525
    6     3  00018  donut           Cake  0.559
    7     3  00039  donut  Old Fashioned  0.558
    
    >>> edata
       group    id                       type     eid  etype            name   ppu
    0      1   501                      Nonex   03003  donut  zOld Fashioned  0.55
    1      1    52                    xGlazed   03003  donut  zOld Fashioned  0.55
    2      1   503                 Chocolatez   03003  donut  zOld Fashioned  0.55
    3      2    01                       None   00401  donut            Cake  0.55
    4      2     2                    xGlazed   00401  donut            Cake  0.55
    5      2    55                     xSugar   00401  donut            Cake  0.55
    6      2   507             Powdered Sugar   00401  donut            Cake  0.55
    7      2   506  xChocolate with Sprinkles   00401  donut            Cake  0.55
    8      2    03                 xChocolate   00401  donut            Cake  0.55
    9      2    54                     xMaple   00401  donut            Cake  0.55
    10     3  5071                       None  050403  donut   Old Fashioned  0.55
    11     3  5072                    Glazedx  050403  donut   Old Fashioned  0.55
    12     3  5703                 Chocolatex  050403  donut   Old Fashioned  0.55
    

    However, I don’t see how to merge bdata and edata.

    Login or Signup to reply.
  2. Had to build a recursive iterator to get through the lists embedded within the dictionaries which is what the drill_through function is doing. The dictionaries with a topping were duplicated and a type was assigned for those, otherwise it was set to None.

    import pandas as pd
    
    x = {} # the input that you have in question
    
    def drill_through(inner_val, list_save=[]):
        if isinstance(inner_val, dict):
            list_save.append(inner_val)
        elif isinstance(inner_val, list):
            for ele in inner_val:
                drill_through(ele,list_save)
        else:
            print(inner_val) # incase there is another data type
        return list_save
    
    
    def get_flattened(val: dict) -> dict:
        flattened_result = []
        for name in val:
            for num in val[name]:
                inner_val = val[name][num]
                drilled = drill_through(inner_val)
                for drill_value in drilled:
                    flattened_result.append(drill_value)
    
        result = []
        for new_dict in flattened_result:
            d = {}
            if 'topping' in new_dict.keys():
                iter_array = new_dict['topping']
                del new_dict['topping']
                for toppin in iter_array:
                    d = new_dict
                    try:
                        d['type'] = toppin['type']
                    except:
                        d['type'] = None
                    result.append(d)
            else:
                d = new_dict
                d['type'] = None
                result.append(d)
        return flattened_result
                      
    
    df = pd.DataFrame(get_flattened(x))
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search