skip to Main Content

I need some help on using json_normalizer to create multiple subcolumns from main columns ? The below code is able to generate an output file but its puts everything in one column however i need something seperate columns with heading like moniker.config, moniker.type, moniker.key, moniker.keyparts for each row

from pandas.io.json import json_normalize
import os
import pandas as pd

def json_normalize_recursive(base_column, data, df=pd.DataFrame()):
    if df.empty:
        df = json_normalize(data, record_prefix=base_column+'.')
    nested = df.select_dtypes(include='object')

    for col in nested.columns:
        try:
            nested_df = json_normalize(nested[col].tolist())
            nested_df.columns = [base_column+'.'+str(col)+'.'+str(c) for c in nested_df.columns]
            df = pd.concat([df.drop(col, axis=1), nested_df], axis=1)
        except ValueError:
            pass
    return df

data = {
   "errors":[
   ],
   "data":[
      {
         "moniker":{
            "config":"fx.ipv.london.eod",
            "type":"fx.spot",
            "key":"EUR/CZK",
            "keyParts":[
               "EUR",
               "CZK"
            ],
            "configType":"fx.ipv.london.eod/fx.spot",
            "live":True
         },
         "queryMoniker":{
            "config":"fx.ipv.london.eod",
            "type":"EUR/CZK",
            "key":"EUR/CZK",
            "tag":{
               "owner":"official",
               "type":"fx.spot",
               "key":"EUR/CZK",
               "tag":{
                  "owner":"official",
                  "date":13434324400999,
                  "cutoff":"London",
                  "name":"ipv",
                  "live":True
               },
               "keyParts":[
                  "EUR",
                  "CZK"
               ],
               "configType":"fx.ipv.london.eod/fx.spot",
               "live":False
            },
            "instance":{
               "data":"<FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/>",
               "unmarshalled":True,
               "marshalled":True,
               "format":"fx/xml/1",
               "valid":True,
               "sequence":1643434234234,
               "instanceMoniker":{
                  "source":"viper.tagcopy",
                  "config":"fx.london.official.copy",
                  "keyParts":[
                     "EUR",
                     "CZK"
                  ]
               }
            }
         }
      }
   ]
}

df = json_normalize_recursive('', data)
print(df)

cwd = os.getcwd()


filepath = os.path.join(cwd, 'Desktop', 'output.csv')

df.to_csv(filepath, index=False)

Desired output:
enter image description here

2

Answers


  1. You can try:

    data = {
        "errors": [],
        "data": [
            {
                "moniker": {
                    "config": "fx.ipv.london.eod",
                    "type": "fx.spot",
                    "key": "EUR/CZK",
                    "keyParts": ["EUR", "CZK"],
                    "configType": "fx.ipv.london.eod/fx.spot",
                    "live": True,
                },
                "queryMoniker": {
                    "config": "fx.ipv.london.eod",
                    "type": "EUR/CZK",
                    "key": "EUR/CZK",
                    "tag": {
                        "owner": "official",
                        "type": "fx.spot",
                        "key": "EUR/CZK",
                        "tag": {
                            "owner": "official",
                            "date": 13434324400999,
                            "cutoff": "London",
                            "name": "ipv",
                            "live": True,
                        },
                        "keyParts": ["EUR", "CZK"],
                        "configType": "fx.ipv.london.eod/fx.spot",
                        "live": False,
                    },
                    "instance": {
                        "data": '<FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/>',
                        "unmarshalled": True,
                        "marshalled": True,
                        "format": "fx/xml/1",
                        "valid": True,
                        "sequence": 1643434234234,
                        "instanceMoniker": {
                            "source": "viper.tagcopy",
                            "config": "fx.london.official.copy",
                            "keyParts": ["EUR", "CZK"],
                        },
                    },
                },
            }
        ],
    }
    
    df = pd.DataFrame(data['data'])
    
    df = pd.concat([df, df.pop('moniker').apply(pd.Series).add_prefix('moniker.')], axis=1)
    df = pd.concat([df, df.pop('queryMoniker').apply(pd.Series).add_prefix('queryMoniker.')], axis=1)
    df = pd.concat([df, df.pop('queryMoniker.tag').apply(pd.Series).add_prefix('queryMoniker.tag.')], axis=1)
    df = pd.concat([df, df.pop('queryMoniker.instance').apply(pd.Series).add_prefix('queryMoniker.instance.')], axis=1)
    
    df = df.explode('moniker.keyParts')
    print(df)
    

    Prints:

          moniker.config moniker.type moniker.key moniker.keyParts         moniker.configType  moniker.live queryMoniker.config queryMoniker.type queryMoniker.key queryMoniker.tag.owner queryMoniker.tag.type queryMoniker.tag.key                                                                            queryMoniker.tag.tag queryMoniker.tag.keyParts queryMoniker.tag.configType  queryMoniker.tag.live                                         queryMoniker.instance.data  queryMoniker.instance.unmarshalled  queryMoniker.instance.marshalled queryMoniker.instance.format  queryMoniker.instance.valid  queryMoniker.instance.sequence                                                         queryMoniker.instance.instanceMoniker
    0  fx.ipv.london.eod      fx.spot     EUR/CZK              EUR  fx.ipv.london.eod/fx.spot          True   fx.ipv.london.eod           EUR/CZK          EUR/CZK               official               fx.spot              EUR/CZK  {'owner': 'official', 'date': 13434324400999, 'cutoff': 'London', 'name': 'ipv', 'live': True}                [EUR, CZK]   fx.ipv.london.eod/fx.spot                  False  <FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/>                                True                              True                     fx/xml/1                         True                   1643434234234  {'source': 'viper.tagcopy', 'config': 'fx.london.official.copy', 'keyParts': ['EUR', 'CZK']}
    0  fx.ipv.london.eod      fx.spot     EUR/CZK              CZK  fx.ipv.london.eod/fx.spot          True   fx.ipv.london.eod           EUR/CZK          EUR/CZK               official               fx.spot              EUR/CZK  {'owner': 'official', 'date': 13434324400999, 'cutoff': 'London', 'name': 'ipv', 'live': True}                [EUR, CZK]   fx.ipv.london.eod/fx.spot                  False  <FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/>                                True                              True                     fx/xml/1                         True                   1643434234234  {'source': 'viper.tagcopy', 'config': 'fx.london.official.copy', 'keyParts': ['EUR', 'CZK']}
    
    Login or Signup to reply.
  2. try using df = pd.json_normalize(data,’data'[‘monier’,’queryMonier’]

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search