skip to Main Content

As in the title, I’m trying to write a Ebay web-scrape program, yet when I try to find the price, it creates a list error, yet it works for getting the product name.

The url is: https://www.ebay.com.au/sch/i.html?_from=R40&_nkw=switch&_sacat=0&_pgn=1

import bs4
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup

**Open Collection**

uClient = uReq(my_url)
page_html = uClient.read()
uClient.close

grabs each products

containers = page_soup.findAll("div",  {"class" : "s-item__wrapper clearfix"})

filename = "EbayWebscraping.csv"
f = open(filename, "w")

headers = "product_name, quality"

for container in containers:

    title_container = container.findAll('h3', {'class' : 's-item__title'} )
    product_name = title_container[0].text

#Where the problem is#

    price_container = container.findAll('span', {'class' : 's-item__price'})
    price = price_container[0].text



    print('Product: ' + product_name)
    print('Price: ' + price)

2

Answers


  1. if you see containers in which at index 0 there is no product or price info so you can start from index 1 and also you can use try-except instead of that

    import requests
    from bs4 import BeautifulSoup
    
    
    page = requests.get("https://www.ebay.com.au/sch/i.html?_from=R40&_nkw=switch&_sacat=0&_pgn=1")
    soup=BeautifulSoup(page.text,"lxml")
    
    containers = soup.findAll("div",  {"class" : "s-item__wrapper clearfix"})[1:]
    for container in containers: 
            print(container.find('h3', {'class' : 's-item__title'} ).text)
            print(container.find("span", class_="s-item__price").text)
    

    Output:

    30 in 1 Game Collection Nintendo Switch Brand New Sealed
    AU $47.00
    Street Fighter 30th Anniversary Collection Nintendo Switch Brand New Sealed
    AU $47.00
    For Nintendo Switch Case ZUSLAB Clear Slim Soft Heavy Duty Shockproof Cover
    AU $9.99 to AU $16.95
    .....
    
    Login or Signup to reply.
  2. You can also check if the selector is present before doing further processing:

    if container.findAll('span', {'class' : 's-item__price'}):
        # do something
    

    You also don’t need to access [0] index. text would work perfectly. Additionally, there’s no need to use findAll since you already extracting data from containers and its selector that contains data about title, price inside. Think of the container as matryoshka doll if it makes more sense.

    You just have to call text and price selectors e.g:

    containers = page_soup.findAll("div",  {"class" : "s-item__wrapper clearfix"})
    
    for container in containers:
        product_name  = container.find('h3', {'class' : 's-item__title'}).text
        price = container.find('span', {'class' : 's-item__price'}).text
    

    Code that paginates through all pages and example in online IDE.

    from bs4 import BeautifulSoup
    import requests, json, lxml
    
    # https://requests.readthedocs.io/en/latest/user/quickstart/#custom-headers
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
    }
        
    params = {
        '_nkw': 'switch',              # search query  
        '_pgn': 1                      # page number
    }
    
    data = []
    
    while True:
        page = requests.get('https://www.ebay.com/sch/i.html', params=params, headers=headers, timeout=30)
        soup = BeautifulSoup(page.text, 'lxml')
        
        print(f"Extracting page: {params['_pgn']}")
    
        print("-" * 10)
        
        for products in soup.select(".s-item__info"):
            title = products.select_one(".s-item__title span").text
            price = products.select_one(".s-item__price").text
            link = products.select_one(".s-item__link")["href"]
            
            data.append({
              "title" : title,
              "price" : price,
              "link" : link
            })
    
        if soup.select_one(".pagination__next"):
            params['_pgn'] += 1
        else:
            break
    
        print(json.dumps(data, indent=2, ensure_ascii=False))
    

    Example output

    Extracting page: 1
    ----------
    [
      {
        "title": "Shop on eBay",
        "price": "$20.00",
        "link": "https://ebay.com/itm/123456?hash=item28caef0a3a:g:E3kAAOSwlGJiMikD&amdata=enc%3AAQAHAAAAsJoWXGf0hxNZspTmhb8%2FTJCCurAWCHuXJ2Xi3S9cwXL6BX04zSEiVaDMCvsUbApftgXEAHGJU1ZGugZO%2FnW1U7Gb6vgoL%2BmXlqCbLkwoZfF3AUAK8YvJ5B4%2BnhFA7ID4dxpYs4jjExEnN5SR2g1mQe7QtLkmGt%2FZ%2FbH2W62cXPuKbf550ExbnBPO2QJyZTXYCuw5KVkMdFMDuoB4p3FwJKcSPzez5kyQyVjyiIq6PB2q%7Ctkp%3ABlBMULq7kqyXYA"
      },
      {
        "title": "Jabra Elite 7 Pro - Black Certified Refurbished",
        "price": "$82.99",
        "link": "https://www.ebay.com/itm/165621993671?epid=12050643207&hash=item268fd710c7:g:gMwAAOSwx8Bi9Fwg&amdata=enc%3AAQAHAAAA4NGq89JefbLJPItXeQ93BWhuE9Wt3pRHvU92HE2wiaGKAUlhQ8hDKu9iP2m5gdNQc8t8ujFSUwXJSyCxrnjh9qaxVXN0s0V7clbWiZTPr7Co3AwECpNLit29NfC%2BXbQxEv7kePJokjM9wnHv%2BAamoTlPl0K8BHa0S3FVrb7IUn9s%2FmvdzTiGUd4DHYNdIEQeFNK7zqB8%2BlWrukvfUz62JemzooE1UYtLbCtQwfIDP1F2GbOL4DoRwHXynUtpduYPA8TX6qZOv8eL44j4hNnP6%2BjGBaDGCReJ6ld13xxhYEUf%7Ctkp%3ABFBM3qnT0f5g"
      },
      {
        "title": "New Listingnintendo switch bundle ",
        "price": "$225.00",
        "link": "https://www.ebay.com/itm/354344900745?hash=item52809a1889:g:egsAAOSw-qZjUQl-&amdata=enc%3AAQAHAAAA4MkbjLSYGoCVhjI%2BBE%2F1cIoqAfUyH73WJdSL7XugI%2BMtaCzRdexKqk3SnxM3PT5yMHSrChuJdcLC6ESDVvNs2j01yTzx8Cl9i9CQbV89Gp9tzPQNIaBGkVwSh989DJ4lmSmCKywnPQ9yLQqY3fz96kBJbbZwGd63yks4tTuZOiNcAl7PTriDOrVNHF%2FUXm3s18tajQeqtrZxW4pb8nWa5%2FtdmrwDphxTKmA9sONVXfKX5oFujpDxrwswe%2FgoJi2XGjGqe06ruHbzH295EHuRLUv4Tn0R2Kf7CKaman2IEpPo%7Ctkp%3ABFBM3qnT0f5g"
      },
      # ...
    ]
    

    As an alternative, you can use Ebay Organic Results API from SerpApi. It’s a paid API with a free plan that handles blocks and parsing on their backend.

    Example code that paginates through all pages:

    from serpapi import EbaySearch
    import os, json
    
    params = {
        "api_key": os.getenv("API_KEY"),  # serpapi api key    
        "engine": "ebay",                 # search engine
        "ebay_domain": "ebay.com",        # ebay domain
        "_nkw": "switch",                 # search query
        "_pgn": 1                         # page number                  
        "LH_Sold": "1"                    # shows sold items
    }
    
    search = EbaySearch(params)        # where data extraction happens
    
    page_num = 0
    
    data = []
    
    while True:
        results = search.get_dict()     # JSON -> Python dict
    
        if "error" in results:
            print(results["error"])
            break
        
        for organic_result in results.get("organic_results", []):
            link = organic_result.get("link")
            price = organic_result.get("price")
    
            data.append({
              "price" : price,
              "link" : link
            })
                        
        page_num += 1
        print(page_num)
        
        if "next" in results.get("pagination", {}):
            params['_pgn'] += 1
    
        else:
            break
    
        print(json.dumps(data, indent=2))
    

    Output:

    [
       {
        "price": {
          "raw": "$70.00",
          "extracted": 70.0
        },
        "link": "https://www.ebay.com/itm/334599074264?hash=item4de7a8b1d8:g:Vy4AAOSwLLNjUK2i&amdata=enc%3AAQAHAAAAkKM1u%2BmETRpbgLxiKL9uymVFiae4NU2iJa00z6qQK4lyzoe477sEDhhVVjF39BDTAOJQ4PLP%2BoXj1xf5wH8Ja5v1oAmO%2FNRlSFlTK80FlnQkHpIYswiG%2BNH44f98M5LWkwgeOb%2FRVc9uU6Ep9HYaV9JV39LZFRiOJLOGgFvoRxLD4731y0VuzM%2BcPXThX7aXtA%3D%3D%7Ctkp%3ABk9SR4KOv9H-YA"
      },
      {
        "price": {
          "raw": "$169.95",
          "extracted": 169.95
        },
        "link": "https://www.ebay.com/itm/185625421454?epid=4050657390&hash=item2b3823268e:g:WrIAAOSwPKdjPfvK&amdata=enc%3AAQAHAAAAoBkI9bwtrhJH9mDVPkHzYgem23XBXWHO%2FghvdNjkqq2RX%2BCoy33RIc%2FxXg%2BHWp0Y5jUL9%2BOfnpKyRshkZTRttODPLt%2Fu0VIfjunwr%2F6r9lKHiZ9w%2FnaITM0BTU0FeU1gKw2dERJwDKrzgCPNc%2FStsq0BdCUYNxQeLG4I1ezDBYZSseUv96U33wRLz%2BJ94pP6UgnCp2nj4oX3qFujBLsvG%2F8%3D%7Ctkp%3ABk9SR4KOv9H-YA"
      },
      # ...
    ]
    

    Disclaimer, I work for SerpApi.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search