skip to Main Content

I have a list of jsons i made after web scraping a store website using this code :

responses.append(requests.request("POST", url, data=payload, headers=headers).json())

now my issue is that each json , has a specific number of elements, and each element contains (‘title’, ‘id’,’price’,’place’,’region’)

I want to iterate through the list of jsons and then iterate through the 48 elements to parse those pieces of information into a CSV file

I’ve tried this code :

data = []
for j in responses : #iterating through the list of jsons
    for i in range(len(responses[j]['data']['search']['announcements']['data'])) : #iterating through the elements
        data.append([responses[j]["data"]["search"]["announcements"]['data'][i]['id'],
                     responses[j]["data"]["search"]["announcements"]['data'][i]['title'],
                     responses[j]["data"]["search"]["announcements"]['data'][i]['createdAt'],
                     responses[j]["data"]["search"]["announcements"]['data'][i]['description'],
                     responses[j]["data"]["search"]["announcements"]['data'][i]['cities'][0]['name'],
                     responses[j]["data"]["search"]["announcements"]['data'][i]['cities'][0]['region']['name'],
                     responses[j]["data"]["search"]["announcements"]['data'][i]['price']
                     
                 
                ])
        print(j)
Cars_data = pd.DataFrame(data,columns=['id','Car_name','Post_Created','description','city_name','wilaya','price'])

i get this error :

TypeError Traceback (most recent call last) ~AppDataLocalTempipykernel_164838939902.py in
1 data = [] 2 for j in resp :
—-> 3 for i in range(len(resp[j][‘data’][‘search’][‘announcements’][‘data’])) :
4 data.append([resp[j]["data"]["search"]["announcements"][‘data’][i][‘id’],
5 resp[j]["data"]["search"]["announcements"][‘data’][i][‘title’],

  TypeError: list indices must be integers or slices, not dict

and before i got out of index error

here is one element of json : (they all have the same format)

> {'id': '34456405',   'title': 'Hyundai i10 2012 GLS',   'slug':
> 'city-car-hyundai-i10-2012-gls-alger-centre-algiers-algeria',  
> 'createdAt': '2022-12-07T11:33:06.000Z',   'isFromStore': False,  
> 'isCommentEnabled': False,   'hasDelivery': False,   'deliveryType':
> None,   'description': 'سيارة نقية و مغلفة، فيها شوية صبيغة على برا
> كيما في الصور، محرك ما شاء الله n 10/10 ما يسخن ما ينقص زيت. 00 مصروف
> ',   'status': 'PUBLISHED',   'cities': [{'id': '556',
>     'name': 'Alger centre',
>     'slug': 'alger-centre-556',
>     'region': {'id': '16',
>      'name': 'Algiers',
>      'slug': 'alger-16',
>      '__typename': 'Region'},
>     '__typename': 'City'}],   'store': None,   'user': {'id': '87200', '__typename': 'User'},   'defaultMedia': {'mediaUrl':
> 'https://cdn9.ouedkniss.com/400/medias/announcements/images/gJJJl/7LIA1J26v5LTHmkp9Bn4zNr4NTrHwF1lJ95P4UZm.jpg',
> '__typename': 'AnnouncementMedia'},   'price': 1450000,  
> 'pricePreview': 145,   'priceUnit': 'MILLION',   'oldPrice': None,  
> 'priceType': 'FIXED',   'exchangeType': None,   '__typename':
> 'Announcement',   'smallDescription': [{'valueText': ['255200km'],
>     '__typename': 'AnnouncementSpecDisplay'},    {'valueText': ['Petrol'], '__typename': 'AnnouncementSpecDisplay'},    {'valueText':
> ['1.1'], '__typename': 'AnnouncementSpecDisplay'},    {'valueText':
> ['Manuel'], '__typename': 'AnnouncementSpecDisplay'},    {'valueText':
> ['Silver grey'], '__typename': 'AnnouncementSpecDisplay'},   
> {'valueText': ['Grey card'], '__typename':
> 'AnnouncementSpecDisplay'}],   'noAdsense': False},  {'id':
> '34453476',   'title': 'Toyota Auris 2009 Auris',   'slug':
> 'average-sedan-toyota-auris-2009-beni-messous-algiers-algeria',  
> 'createdAt': '2022-12-07T11:32:54.000Z',   'isFromStore': False,  
> 'isCommentEnabled': True,   'hasDelivery': False,   'deliveryType':
> None,   'description': '',   'status': 'PUBLISHED',   'cities':
> [{'id': '567',
>     'name': 'Beni messous',
>     'slug': 'beni-messous-567',
>     'region': {'id': '16',
>      'name': 'Algiers',
>      'slug': 'alger-16',
>      '__typename': 'Region'},
>     '__typename': 'City'}],   'store': None,   'user': {'id': '3203586', '__typename': 'User'},   'defaultMedia': {'mediaUrl':
> 'https://cdn9.ouedkniss.com/400/medias/announcements/images/9kzK4/gGlMMM4jDueAFraAUioidvqJYUkYDU9zDRfX0o5N.jpg',
> '__typename': 'AnnouncementMedia'},   'price': 10000,  
> 'pricePreview': 1,   'priceUnit': 'MILLION',   'oldPrice': None,  
> 'priceType': 'NEGOTIABLE',   'exchangeType': None,   '__typename':
> 'Announcement',   'smallDescription': [{'valueText': ['182000km'],
>     '__typename': 'AnnouncementSpecDisplay'},    {'valueText': ['Diesel'], '__typename': 'AnnouncementSpecDisplay'},    {'valueText':
> ['2.0 D4D 126ch'], '__typename': 'AnnouncementSpecDisplay'},   
> {'valueText': ['Automatic'], '__typename': 'AnnouncementSpecDisplay'},
> {'valueText': ['Black'], '__typename': 'AnnouncementSpecDisplay'},   
> {'valueText': ['Grey card'], '__typename':
> 'AnnouncementSpecDisplay'}],   'noAdsense': False}

2

Answers


  1. you can try iterate element by element instead of list[index]:

    for response in responses:
        for item in response['data']['search']['announcements']['data']:
           data.append(item['id'])
    

    I found it easier to manipulate the objects itselves

    Login or Signup to reply.
  2. response is your list of dict

    # response = [{},{}]
    base_data = []
    for line in response:
        data = []
        for key, values in line.items():
            if key == "id":
                data.append(values)
            elif key == "createdAt":
                data.append(values)
            elif key == "description":
                data.append(values)
            elif key == "cities":
                data.append(values[0].get("name"))
                data.append(values[0].get("region").get("name"))
            elif key == "price":
                data.append(values)
        base_data.append(data)
    
    print(base_data)
    
    

    [['34456405', '2022-12-07T11:33:06.000Z', 'ارة نقية و مغلفة، فيها شوية صبيغة على براn كيما في الصور، محرك ما شاء الله n 10/10 ما يسخن ما ينقص زيت. 00 مصروفn ', 'Alger centre', 'Algiers', 1450000], ['34453476', '2022-12-07T11:32:54.000Z', '', 'Beni messous', 'Algiers', 10000]]

    this will get the values from dict and store it in List of list

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search