How to parse this JSON which starts with two square brackets? - Artificial Intelligence

Alan003
June 12, 2020
249 views
3 votes
4 Answers

I have a JSON File that starts with two square brackets. How do i parse the data from it?
The type of the JSON is class ‘list’. I have gone though many Stackoverflow solutions but none of them helped. I am new to python and trying to use API extraction.

[
  [
    {
      "previous": null,
      "story_count": 3,
      "results": [
        {
          "source_name": "Fact",
          "attachments": [],
          "title": "acquire Spot",
          "duplicates": [],
          "Content Types": [
            {
              "logo": "tags-ico.png",
              "id": 3,
              "name": "News Articles"
            }
          ],
          "Triggers": [
            {
              "logo": "tags-ico.png",
              "id": 24195,
              "name": "M&A"
            },
            {
              "logo": "tags-ico.png",
              "id": 24198,
              "name": "Digital"
            }
          ],
          "summary": "ways to work, interact and do business. ",
          "Channels": [
            {
              "logo": "tags-ico.png",
              "id": 17,
              "name": "Websites"
            }
          ],
          "Sources": [
            {
              "logo": "tags-ico.png",
              "id": 68636,
              "name": "Fact"
            }
          ],
          "image_url": "",
          "Duns Number": [
            {
              "logo": "tags-ico.png",
              "id": 18740,
              "name": "802054742"
            }
          ],
          "Firms": [
            {
              "logo": "400x400-18235.jpg",
              "id": 18235,
              "name": "Netapp"
            }
          ],
          "source_url": "DQCH0020200610eg6900002",
          "pub_date": "2020-06-09T00:00:00Z",
          "id": 2006108515973,
          "previews": [],
          "duplicate_count": 0
        },
        {
          "source_name": "Fact",
          "attachments": [],
          "title": "employees in next 10 months",
          "duplicates": [
            {
              "summary": "the next-generation datacentres.",
              "source_name": "Fact",
              "id": 2006098291431,
              "source_url": "0200609eg690015q",
              "title": "Alibaba Cloud to create 5,000 tech jobs in next 10 months"
            },
            {
              "summary": "servers, chips and artificial intelligence (AI). ",
              "source_name": "Mint",
              "id": 2006098297350,
              "source_url": "https:HNMINT0020200609eg69000ez",
              "title": "hire 5,000 technology talent this fiscal"
            },
            {
              "summary": "conglomerate Alibaba Group said in a statement. ",
              "source_name": "Fact",
              "id": 2006098399325,
              "source_url": "MMVTCE0020200609eg6900008",
              "title": "5,000 tech professionals this financial year"
            }
          ],
          "Content Types": [
            {
              "logo": "tags-ico.png",
              "id": 3,
              "name": "News Articles"
            }
          ],
          "Triggers": [
            {
              "logo": "tags-ico.png",
              "id": 24198,
              "name": "Digital capabilities"
            }
          ],
          "summary": "servers, chips and artificial intelligence.",
          "Channels": [
            {
              "logo": "tags-ico.png",
              "id": 17,
              "name": "News and Other Websites"
            }
          ],
          "Sources": [
            {
              "logo": "tags-ico.png",
              "id": 68636,
              "name": "Fact"
            }
          ],
          "image_url": "",
          "Duns Number": [
            {
              "logo": "tags-ico.png",
              "id": 18470,
              "name": "680536229"
            }
          ],
          "Firms": [
            {
              "logo": "rwvj6i1r_400x400-17997.jpg",
              "id": 17997,
              "name": "Alibaba Group Services Limited"
            }
          ],
          "source_url": "TELWOR0020200609eg6900001",
          "pub_date": "2020-06-09T00:00:00Z",
          "id": 2006098292712,
          "previews": [],
          "duplicate_count": 3
        },
        {
          "source_name": "Hindustan Times",
          "attachments": [],
          "title": "New technologies like AI help reduce bias",
          "duplicates": [],
          "Content Types": [
            {
              "logo": "tags-ico.png",
              "id": 3,
              "name": "News Articles"
            }
          ],
          "Triggers": [
            {
              "logo": "tags-ico.png",
              "id": 24198,
              "name": "Digital capabilities"
            },
            {
              "logo": "tags-ico.png",
              "id": 24199,
              "name": "Upskilling/ reskilling"
            }
          ],
          "summary": "the HR function.",
          "Channels": [
            {
              "logo": "tags-ico.png",
              "id": 17,
              "name": "News and Other Websites"
            }
          ],
          "Sources": [
            {
              "logo": "tags-ico.png",
              "id": 1870,
              "name": "Hindustan Times"
            }
          ],
          "image_url": "",
          "Duns Number": [
            {
              "logo": "tags-ico.png",
              "id": 15214,
              "name": "001368083"
            }
          ],
          "Firms": [
            {
              "logo": "ibm-15166.jpg",
              "id": 15166,
              "name": "IBM"
            }
          ],
          "source_url": "abcd.com",
          "pub_date": "2020-02-04T00:00:00Z",
          "id": 2006108589032,
          "previews": [],
          "duplicate_count": 0
        }
      ],
      "next": null
    }
  ]
]

I have written the below code but it’s not working.

json_data=open('responsefile2.json')
df1 = json.load(json_data)
json_data.close()
list_title=[]
list_Triggers =[]
list_Duns_Number=[]
list_Summary=[]
list_pub_date =[]
list_Client_Name =[]
list_Source_URL =[]
for i in range(df1.shape[0]):
    data1=df1["results"][i]
    for person in data1:
        try:
            list_title.append(person['title'])
            list_Triggers.append(person['Triggers'])
            list_Duns_Number.append(person['Duns Number'])
            list_Summary.append(person['summary'])
            list_pub_date.append(person['pub_date'])
            list_Client_Name.append(person['Firms'])
            list_Source_URL.append(person['source_url'])
        except:
            print('')
import pandas as pd
lists = [list_Duns_Number,list_Client_Name,list_Triggers,list_title,list_Summary,list_Source_URL,list_pub_date]
df2 = pd.concat([pd.Series(x) for x in lists], axis=1)

The output throws an error:

‘list’ object has no attribute ‘shape’

Answers

- user2853437
- June 12, 2020 at 7:32 pm
- 0 votes
0
You could just select the inner list elements and iterate them, if there is more than one.
```
json_data=open('responsefile2.json')
df1 = json.load(json_data[0][0])
```
Whats also always useful: pd.json_normalize()
Login or Signup to reply.

‘list’ object has no attribute ‘shape’
This is because there is no shape key present in the json data.
If you know the data thats in json file. You could probably remove the outer loop

for i in range(df1.shape[0]):

And directly iterate on the results.

df1 = json.load(json_data)
json_data.close()
list_title=[]
list_Triggers =[]
list_Duns_Number=[]
list_Summary=[]
list_pub_date =[]
list_Client_Name =[]
list_Source_URL =[]

for person in df1[0][0]['results']:
    try:
        list_title.append(person['title'])
        list_Triggers.append(person['Triggers'])
        list_Duns_Number.append(person['Duns Number'])
        list_Summary.append(person['summary'])
        list_pub_date.append(person['pub_date'])
        list_Client_Name.append(person['Firms'])
        list_Source_URL.append(person['source_url'])
    except:
        print('whats wrong')

Try this to normalize your json:

with open('1.json', 'r+') as f:
    data = json.load(f)

for a in data:
    for b in a:
        df = pd.json_normalize(b, record_path=['results'], meta=[['previous'], ['story_count']])
print(df)

Output:

       source_name attachments                                      title  ... duplicate_count previous story_count
0             Fact          []                               acquire Spot  ...               0     None           3
1             Fact          []                employees in next 10 months  ...               3     None           3
2  Hindustan Times          []  New technologies like AI help reduce bias  ...               0     None           3

- vhs
- May 10, 2022 at 10:31 am
- 0 votes
0
What you’re looking at is a Dictionary with one entry. Before parsing instantiate a new Map like:
```
const json_data = open('responsefile2.json')
const df1 = new Map(json.load(json_data));
json_data.close()
```
Then iterate over df1 entries like:
```
for (const [_, value] of df1.entries()) {
  /* process nested array items */
}
```
Items will be processed in insertion order. See Map on MDN for more info.
Login or Signup to reply.

Please signup or login to give your own answer.

Click here to cancel reply.

How to parse this JSON which starts with two square brackets? – Artificial Intelligence

Answers