skip to Main Content

I have a JSON File that starts with two square brackets. How do i parse the data from it?
The type of the JSON is class ‘list’. I have gone though many Stackoverflow solutions but none of them helped. I am new to python and trying to use API extraction.

[
  [
    {
      "previous": null,
      "story_count": 3,
      "results": [
        {
          "source_name": "Fact",
          "attachments": [],
          "title": "acquire Spot",
          "duplicates": [],
          "Content Types": [
            {
              "logo": "tags-ico.png",
              "id": 3,
              "name": "News Articles"
            }
          ],
          "Triggers": [
            {
              "logo": "tags-ico.png",
              "id": 24195,
              "name": "M&A"
            },
            {
              "logo": "tags-ico.png",
              "id": 24198,
              "name": "Digital"
            }
          ],
          "summary": "ways to work, interact and do business. ",
          "Channels": [
            {
              "logo": "tags-ico.png",
              "id": 17,
              "name": "Websites"
            }
          ],
          "Sources": [
            {
              "logo": "tags-ico.png",
              "id": 68636,
              "name": "Fact"
            }
          ],
          "image_url": "",
          "Duns Number": [
            {
              "logo": "tags-ico.png",
              "id": 18740,
              "name": "802054742"
            }
          ],
          "Firms": [
            {
              "logo": "400x400-18235.jpg",
              "id": 18235,
              "name": "Netapp"
            }
          ],
          "source_url": "DQCH0020200610eg6900002",
          "pub_date": "2020-06-09T00:00:00Z",
          "id": 2006108515973,
          "previews": [],
          "duplicate_count": 0
        },
        {
          "source_name": "Fact",
          "attachments": [],
          "title": "employees in next 10 months",
          "duplicates": [
            {
              "summary": "the next-generation datacentres.",
              "source_name": "Fact",
              "id": 2006098291431,
              "source_url": "0200609eg690015q",
              "title": "Alibaba Cloud to create 5,000 tech jobs in next 10 months"
            },
            {
              "summary": "servers, chips and artificial intelligence (AI). ",
              "source_name": "Mint",
              "id": 2006098297350,
              "source_url": "https:HNMINT0020200609eg69000ez",
              "title": "hire 5,000 technology talent this fiscal"
            },
            {
              "summary": "conglomerate Alibaba Group said in a statement. ",
              "source_name": "Fact",
              "id": 2006098399325,
              "source_url": "MMVTCE0020200609eg6900008",
              "title": "5,000 tech professionals this financial year"
            }
          ],
          "Content Types": [
            {
              "logo": "tags-ico.png",
              "id": 3,
              "name": "News Articles"
            }
          ],
          "Triggers": [
            {
              "logo": "tags-ico.png",
              "id": 24198,
              "name": "Digital capabilities"
            }
          ],
          "summary": "servers, chips and artificial intelligence.",
          "Channels": [
            {
              "logo": "tags-ico.png",
              "id": 17,
              "name": "News and Other Websites"
            }
          ],
          "Sources": [
            {
              "logo": "tags-ico.png",
              "id": 68636,
              "name": "Fact"
            }
          ],
          "image_url": "",
          "Duns Number": [
            {
              "logo": "tags-ico.png",
              "id": 18470,
              "name": "680536229"
            }
          ],
          "Firms": [
            {
              "logo": "rwvj6i1r_400x400-17997.jpg",
              "id": 17997,
              "name": "Alibaba Group Services Limited"
            }
          ],
          "source_url": "TELWOR0020200609eg6900001",
          "pub_date": "2020-06-09T00:00:00Z",
          "id": 2006098292712,
          "previews": [],
          "duplicate_count": 3
        },
        {
          "source_name": "Hindustan Times",
          "attachments": [],
          "title": "New technologies like AI help reduce bias",
          "duplicates": [],
          "Content Types": [
            {
              "logo": "tags-ico.png",
              "id": 3,
              "name": "News Articles"
            }
          ],
          "Triggers": [
            {
              "logo": "tags-ico.png",
              "id": 24198,
              "name": "Digital capabilities"
            },
            {
              "logo": "tags-ico.png",
              "id": 24199,
              "name": "Upskilling/ reskilling"
            }
          ],
          "summary": "the HR function.",
          "Channels": [
            {
              "logo": "tags-ico.png",
              "id": 17,
              "name": "News and Other Websites"
            }
          ],
          "Sources": [
            {
              "logo": "tags-ico.png",
              "id": 1870,
              "name": "Hindustan Times"
            }
          ],
          "image_url": "",
          "Duns Number": [
            {
              "logo": "tags-ico.png",
              "id": 15214,
              "name": "001368083"
            }
          ],
          "Firms": [
            {
              "logo": "ibm-15166.jpg",
              "id": 15166,
              "name": "IBM"
            }
          ],
          "source_url": "abcd.com",
          "pub_date": "2020-02-04T00:00:00Z",
          "id": 2006108589032,
          "previews": [],
          "duplicate_count": 0
        }
      ],
      "next": null
    }
  ]
]

I have written the below code but it’s not working.

json_data=open('responsefile2.json')
df1 = json.load(json_data)
json_data.close()
list_title=[]
list_Triggers =[]
list_Duns_Number=[]
list_Summary=[]
list_pub_date =[]
list_Client_Name =[]
list_Source_URL =[]
for i in range(df1.shape[0]):
    data1=df1["results"][i]
    for person in data1:
        try:
            list_title.append(person['title'])
            list_Triggers.append(person['Triggers'])
            list_Duns_Number.append(person['Duns Number'])
            list_Summary.append(person['summary'])
            list_pub_date.append(person['pub_date'])
            list_Client_Name.append(person['Firms'])
            list_Source_URL.append(person['source_url'])
        except:
            print('')
import pandas as pd
lists = [list_Duns_Number,list_Client_Name,list_Triggers,list_title,list_Summary,list_Source_URL,list_pub_date]
df2 = pd.concat([pd.Series(x) for x in lists], axis=1)

The output throws an error:

‘list’ object has no attribute ‘shape’

4

Answers


  1. You could just select the inner list elements and iterate them, if there is more than one.

    json_data=open('responsefile2.json')
    df1 = json.load(json_data[0][0])
    

    Whats also always useful: pd.json_normalize()

    Login or Signup to reply.
  2. ‘list’ object has no attribute ‘shape’
    This is because there is no shape key present in the json data.
    If you know the data thats in json file. You could probably remove the outer loop

    for i in range(df1.shape[0]):
    

    And directly iterate on the results.

    df1 = json.load(json_data)
    json_data.close()
    list_title=[]
    list_Triggers =[]
    list_Duns_Number=[]
    list_Summary=[]
    list_pub_date =[]
    list_Client_Name =[]
    list_Source_URL =[]
    
    for person in df1[0][0]['results']:
        try:
            list_title.append(person['title'])
            list_Triggers.append(person['Triggers'])
            list_Duns_Number.append(person['Duns Number'])
            list_Summary.append(person['summary'])
            list_pub_date.append(person['pub_date'])
            list_Client_Name.append(person['Firms'])
            list_Source_URL.append(person['source_url'])
        except:
            print('whats wrong')
    
    Login or Signup to reply.
  3. Try this to normalize your json:

    with open('1.json', 'r+') as f:
        data = json.load(f)
    
    for a in data:
        for b in a:
            df = pd.json_normalize(b, record_path=['results'], meta=[['previous'], ['story_count']])
    print(df)
    

    Output:

           source_name attachments                                      title  ... duplicate_count previous story_count
    0             Fact          []                               acquire Spot  ...               0     None           3
    1             Fact          []                employees in next 10 months  ...               3     None           3
    2  Hindustan Times          []  New technologies like AI help reduce bias  ...               0     None           3
    
    Login or Signup to reply.
  4. What you’re looking at is a Dictionary with one entry. Before parsing instantiate a new Map like:

    const json_data = open('responsefile2.json')
    const df1 = new Map(json.load(json_data));
    json_data.close()
    

    Then iterate over df1 entries like:

    for (const [_, value] of df1.entries()) {
      /* process nested array items */
    }
    

    Items will be processed in insertion order. See Map on MDN for more info.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search