I'm trying to web scrape ebay using python and BeautifulSoup, but I'm getting a list index out of rangeerror

ColeMalcolm
June 25, 2021
249 views
0 votes
2 Answers

As in the title, I’m trying to write a Ebay web-scrape program, yet when I try to find the price, it creates a list error, yet it works for getting the product name.

The url is: https://www.ebay.com.au/sch/i.html?_from=R40&_nkw=switch&_sacat=0&_pgn=1

import bs4
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup

**Open Collection**

uClient = uReq(my_url)
page_html = uClient.read()
uClient.close

grabs each products

containers = page_soup.findAll("div",  {"class" : "s-item__wrapper clearfix"})

filename = "EbayWebscraping.csv"
f = open(filename, "w")

headers = "product_name, quality"

for container in containers:

    title_container = container.findAll('h3', {'class' : 's-item__title'} )
    product_name = title_container[0].text

#Where the problem is#

    price_container = container.findAll('span', {'class' : 's-item__price'})
    price = price_container[0].text



    print('Product: ' + product_name)
    print('Price: ' + price)

Answers

if you see containers in which at index 0 there is no product or price info so you can start from index 1 and also you can use try-except instead of that

import requests
from bs4 import BeautifulSoup


page = requests.get("https://www.ebay.com.au/sch/i.html?_from=R40&_nkw=switch&_sacat=0&_pgn=1")
soup=BeautifulSoup(page.text,"lxml")

containers = soup.findAll("div",  {"class" : "s-item__wrapper clearfix"})[1:]
for container in containers: 
        print(container.find('h3', {'class' : 's-item__title'} ).text)
        print(container.find("span", class_="s-item__price").text)

Output:

30 in 1 Game Collection Nintendo Switch Brand New Sealed
AU $47.00
Street Fighter 30th Anniversary Collection Nintendo Switch Brand New Sealed
AU $47.00
For Nintendo Switch Case ZUSLAB Clear Slim Soft Heavy Duty Shockproof Cover
AU $9.99 to AU $16.95
.....

You can also check if the selector is present before doing further processing:

if container.findAll('span', {'class' : 's-item__price'}):
    # do something

You also don’t need to access [0] index. text would work perfectly. Additionally, there’s no need to use findAll since you already extracting data from containers and its selector that contains data about title, price inside. Think of the container as matryoshka doll if it makes more sense.

You just have to call text and price selectors e.g:

containers = page_soup.findAll("div",  {"class" : "s-item__wrapper clearfix"})

for container in containers:
    product_name  = container.find('h3', {'class' : 's-item__title'}).text
    price = container.find('span', {'class' : 's-item__price'}).text

Code that paginates through all pages and example in online IDE.

from bs4 import BeautifulSoup
import requests, json, lxml

# https://requests.readthedocs.io/en/latest/user/quickstart/#custom-headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
}
    
params = {
    '_nkw': 'switch',              # search query  
    '_pgn': 1                      # page number
}

data = []

while True:
    page = requests.get('https://www.ebay.com/sch/i.html', params=params, headers=headers, timeout=30)
    soup = BeautifulSoup(page.text, 'lxml')
    
    print(f"Extracting page: {params['_pgn']}")

    print("-" * 10)
    
    for products in soup.select(".s-item__info"):
        title = products.select_one(".s-item__title span").text
        price = products.select_one(".s-item__price").text
        link = products.select_one(".s-item__link")["href"]
        
        data.append({
          "title" : title,
          "price" : price,
          "link" : link
        })

    if soup.select_one(".pagination__next"):
        params['_pgn'] += 1
    else:
        break

    print(json.dumps(data, indent=2, ensure_ascii=False))

Example output

Extracting page: 1
----------
[
  {
    "title": "Shop on eBay",
    "price": "$20.00",
    "link": "https://ebay.com/itm/123456?hash=item28caef0a3a:g:E3kAAOSwlGJiMikD&amdata=enc%3AAQAHAAAAsJoWXGf0hxNZspTmhb8%2FTJCCurAWCHuXJ2Xi3S9cwXL6BX04zSEiVaDMCvsUbApftgXEAHGJU1ZGugZO%2FnW1U7Gb6vgoL%2BmXlqCbLkwoZfF3AUAK8YvJ5B4%2BnhFA7ID4dxpYs4jjExEnN5SR2g1mQe7QtLkmGt%2FZ%2FbH2W62cXPuKbf550ExbnBPO2QJyZTXYCuw5KVkMdFMDuoB4p3FwJKcSPzez5kyQyVjyiIq6PB2q%7Ctkp%3ABlBMULq7kqyXYA"
  },
  {
    "title": "Jabra Elite 7 Pro - Black Certified Refurbished",
    "price": "$82.99",
    "link": "https://www.ebay.com/itm/165621993671?epid=12050643207&hash=item268fd710c7:g:gMwAAOSwx8Bi9Fwg&amdata=enc%3AAQAHAAAA4NGq89JefbLJPItXeQ93BWhuE9Wt3pRHvU92HE2wiaGKAUlhQ8hDKu9iP2m5gdNQc8t8ujFSUwXJSyCxrnjh9qaxVXN0s0V7clbWiZTPr7Co3AwECpNLit29NfC%2BXbQxEv7kePJokjM9wnHv%2BAamoTlPl0K8BHa0S3FVrb7IUn9s%2FmvdzTiGUd4DHYNdIEQeFNK7zqB8%2BlWrukvfUz62JemzooE1UYtLbCtQwfIDP1F2GbOL4DoRwHXynUtpduYPA8TX6qZOv8eL44j4hNnP6%2BjGBaDGCReJ6ld13xxhYEUf%7Ctkp%3ABFBM3qnT0f5g"
  },
  {
    "title": "New Listingnintendo switch bundle ",
    "price": "$225.00",
    "link": "https://www.ebay.com/itm/354344900745?hash=item52809a1889:g:egsAAOSw-qZjUQl-&amdata=enc%3AAQAHAAAA4MkbjLSYGoCVhjI%2BBE%2F1cIoqAfUyH73WJdSL7XugI%2BMtaCzRdexKqk3SnxM3PT5yMHSrChuJdcLC6ESDVvNs2j01yTzx8Cl9i9CQbV89Gp9tzPQNIaBGkVwSh989DJ4lmSmCKywnPQ9yLQqY3fz96kBJbbZwGd63yks4tTuZOiNcAl7PTriDOrVNHF%2FUXm3s18tajQeqtrZxW4pb8nWa5%2FtdmrwDphxTKmA9sONVXfKX5oFujpDxrwswe%2FgoJi2XGjGqe06ruHbzH295EHuRLUv4Tn0R2Kf7CKaman2IEpPo%7Ctkp%3ABFBM3qnT0f5g"
  },
  # ...
]

As an alternative, you can use Ebay Organic Results API from SerpApi. It’s a paid API with a free plan that handles blocks and parsing on their backend.

Example code that paginates through all pages:

from serpapi import EbaySearch
import os, json

params = {
    "api_key": os.getenv("API_KEY"),  # serpapi api key    
    "engine": "ebay",                 # search engine
    "ebay_domain": "ebay.com",        # ebay domain
    "_nkw": "switch",                 # search query
    "_pgn": 1                         # page number                  
    "LH_Sold": "1"                    # shows sold items
}

search = EbaySearch(params)        # where data extraction happens

page_num = 0

data = []

while True:
    results = search.get_dict()     # JSON -> Python dict

    if "error" in results:
        print(results["error"])
        break
    
    for organic_result in results.get("organic_results", []):
        link = organic_result.get("link")
        price = organic_result.get("price")

        data.append({
          "price" : price,
          "link" : link
        })
                    
    page_num += 1
    print(page_num)
    
    if "next" in results.get("pagination", {}):
        params['_pgn'] += 1

    else:
        break

    print(json.dumps(data, indent=2))

Output:

[
   {
    "price": {
      "raw": "$70.00",
      "extracted": 70.0
    },
    "link": "https://www.ebay.com/itm/334599074264?hash=item4de7a8b1d8:g:Vy4AAOSwLLNjUK2i&amdata=enc%3AAQAHAAAAkKM1u%2BmETRpbgLxiKL9uymVFiae4NU2iJa00z6qQK4lyzoe477sEDhhVVjF39BDTAOJQ4PLP%2BoXj1xf5wH8Ja5v1oAmO%2FNRlSFlTK80FlnQkHpIYswiG%2BNH44f98M5LWkwgeOb%2FRVc9uU6Ep9HYaV9JV39LZFRiOJLOGgFvoRxLD4731y0VuzM%2BcPXThX7aXtA%3D%3D%7Ctkp%3ABk9SR4KOv9H-YA"
  },
  {
    "price": {
      "raw": "$169.95",
      "extracted": 169.95
    },
    "link": "https://www.ebay.com/itm/185625421454?epid=4050657390&hash=item2b3823268e:g:WrIAAOSwPKdjPfvK&amdata=enc%3AAQAHAAAAoBkI9bwtrhJH9mDVPkHzYgem23XBXWHO%2FghvdNjkqq2RX%2BCoy33RIc%2FxXg%2BHWp0Y5jUL9%2BOfnpKyRshkZTRttODPLt%2Fu0VIfjunwr%2F6r9lKHiZ9w%2FnaITM0BTU0FeU1gKw2dERJwDKrzgCPNc%2FStsq0BdCUYNxQeLG4I1ezDBYZSseUv96U33wRLz%2BJ94pP6UgnCp2nj4oX3qFujBLsvG%2F8%3D%7Ctkp%3ABk9SR4KOv9H-YA"
  },
  # ...
]

Disclaimer, I work for SerpApi.

Please signup or login to give your own answer.

Click here to cancel reply.