Question posted in Json
Our archive of expertly curated questions and answers provides insights and solutions to common problems related to this popular data interchange format. From parsing and manipulating JSON data to integrating it with various programming languages and web services, our archive has got you covered. Start exploring today and take your JSON skills to the next level

How to use json_normalize to create subcolumns in csv file

user20432746
January 11, 2023
275 views
0 votes
2 Answers

I need some help on using json_normalizer to create multiple subcolumns from main columns ? The below code is able to generate an output file but its puts everything in one column however i need something seperate columns with heading like moniker.config, moniker.type, moniker.key, moniker.keyparts for each row

from pandas.io.json import json_normalize
import os
import pandas as pd

def json_normalize_recursive(base_column, data, df=pd.DataFrame()):
    if df.empty:
        df = json_normalize(data, record_prefix=base_column+'.')
    nested = df.select_dtypes(include='object')

    for col in nested.columns:
        try:
            nested_df = json_normalize(nested[col].tolist())
            nested_df.columns = [base_column+'.'+str(col)+'.'+str(c) for c in nested_df.columns]
            df = pd.concat([df.drop(col, axis=1), nested_df], axis=1)
        except ValueError:
            pass
    return df

data = {
   "errors":[
   ],
   "data":[
      {
         "moniker":{
            "config":"fx.ipv.london.eod",
            "type":"fx.spot",
            "key":"EUR/CZK",
            "keyParts":[
               "EUR",
               "CZK"
            ],
            "configType":"fx.ipv.london.eod/fx.spot",
            "live":True
         },
         "queryMoniker":{
            "config":"fx.ipv.london.eod",
            "type":"EUR/CZK",
            "key":"EUR/CZK",
            "tag":{
               "owner":"official",
               "type":"fx.spot",
               "key":"EUR/CZK",
               "tag":{
                  "owner":"official",
                  "date":13434324400999,
                  "cutoff":"London",
                  "name":"ipv",
                  "live":True
               },
               "keyParts":[
                  "EUR",
                  "CZK"
               ],
               "configType":"fx.ipv.london.eod/fx.spot",
               "live":False
            },
            "instance":{
               "data":"<FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/>",
               "unmarshalled":True,
               "marshalled":True,
               "format":"fx/xml/1",
               "valid":True,
               "sequence":1643434234234,
               "instanceMoniker":{
                  "source":"viper.tagcopy",
                  "config":"fx.london.official.copy",
                  "keyParts":[
                     "EUR",
                     "CZK"
                  ]
               }
            }
         }
      }
   ]
}

df = json_normalize_recursive('', data)
print(df)

cwd = os.getcwd()


filepath = os.path.join(cwd, 'Desktop', 'output.csv')

df.to_csv(filepath, index=False)

Desired output:

Answers

You can try:

data = {
    "errors": [],
    "data": [
        {
            "moniker": {
                "config": "fx.ipv.london.eod",
                "type": "fx.spot",
                "key": "EUR/CZK",
                "keyParts": ["EUR", "CZK"],
                "configType": "fx.ipv.london.eod/fx.spot",
                "live": True,
            },
            "queryMoniker": {
                "config": "fx.ipv.london.eod",
                "type": "EUR/CZK",
                "key": "EUR/CZK",
                "tag": {
                    "owner": "official",
                    "type": "fx.spot",
                    "key": "EUR/CZK",
                    "tag": {
                        "owner": "official",
                        "date": 13434324400999,
                        "cutoff": "London",
                        "name": "ipv",
                        "live": True,
                    },
                    "keyParts": ["EUR", "CZK"],
                    "configType": "fx.ipv.london.eod/fx.spot",
                    "live": False,
                },
                "instance": {
                    "data": '<FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/>',
                    "unmarshalled": True,
                    "marshalled": True,
                    "format": "fx/xml/1",
                    "valid": True,
                    "sequence": 1643434234234,
                    "instanceMoniker": {
                        "source": "viper.tagcopy",
                        "config": "fx.london.official.copy",
                        "keyParts": ["EUR", "CZK"],
                    },
                },
            },
        }
    ],
}

df = pd.DataFrame(data['data'])

df = pd.concat([df, df.pop('moniker').apply(pd.Series).add_prefix('moniker.')], axis=1)
df = pd.concat([df, df.pop('queryMoniker').apply(pd.Series).add_prefix('queryMoniker.')], axis=1)
df = pd.concat([df, df.pop('queryMoniker.tag').apply(pd.Series).add_prefix('queryMoniker.tag.')], axis=1)
df = pd.concat([df, df.pop('queryMoniker.instance').apply(pd.Series).add_prefix('queryMoniker.instance.')], axis=1)

df = df.explode('moniker.keyParts')
print(df)

Prints:

      moniker.config moniker.type moniker.key moniker.keyParts         moniker.configType  moniker.live queryMoniker.config queryMoniker.type queryMoniker.key queryMoniker.tag.owner queryMoniker.tag.type queryMoniker.tag.key                                                                            queryMoniker.tag.tag queryMoniker.tag.keyParts queryMoniker.tag.configType  queryMoniker.tag.live                                         queryMoniker.instance.data  queryMoniker.instance.unmarshalled  queryMoniker.instance.marshalled queryMoniker.instance.format  queryMoniker.instance.valid  queryMoniker.instance.sequence                                                         queryMoniker.instance.instanceMoniker
0  fx.ipv.london.eod      fx.spot     EUR/CZK              EUR  fx.ipv.london.eod/fx.spot          True   fx.ipv.london.eod           EUR/CZK          EUR/CZK               official               fx.spot              EUR/CZK  {'owner': 'official', 'date': 13434324400999, 'cutoff': 'London', 'name': 'ipv', 'live': True}                [EUR, CZK]   fx.ipv.london.eod/fx.spot                  False  <FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/>                                True                              True                     fx/xml/1                         True                   1643434234234  {'source': 'viper.tagcopy', 'config': 'fx.london.official.copy', 'keyParts': ['EUR', 'CZK']}
0  fx.ipv.london.eod      fx.spot     EUR/CZK              CZK  fx.ipv.london.eod/fx.spot          True   fx.ipv.london.eod           EUR/CZK          EUR/CZK               official               fx.spot              EUR/CZK  {'owner': 'official', 'date': 13434324400999, 'cutoff': 'London', 'name': 'ipv', 'live': True}                [EUR, CZK]   fx.ipv.london.eod/fx.spot                  False  <FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/>                                True                              True                     fx/xml/1                         True                   1643434234234  {'source': 'viper.tagcopy', 'config': 'fx.london.official.copy', 'keyParts': ['EUR', 'CZK']}

- Naina
- January 12, 2023 at 2:11 pm
- 0 votes
0
try using df = pd.json_normalize(data,’data'[‘monier’,’queryMonier’]

Login or Signup to reply.

Please signup or login to give your own answer.

Click here to cancel reply.