skip to Main Content

I’m trying to scrape https://sportsbook.draftkings.com/event/179665865?category=odds&subcategory=player-props however my code wasn’t working and when I troubleshot it I realized it was taking me to the original page at https://sportsbook.draftkings.com/event/179665865 The first link is a tab within the second link.

Here is the code of a tester I created to check to see if I was scraping the correct page.

import re
import urllib.request
from bs4 import BeautifulSoup


request = urllib.request.Request("https://sportsbook.draftkings.com/event/179665865?category=odds&subcategory=player-props")
html = urllib.request.urlopen(request).read()
soup = BeautifulSoup(html,'html.parser')

my_list = []
links = soup.find_all("div", class_="sportsbook-table__body")
for link in links:
    my_list.append(link)
print(my_list[0].prettify())

This code prints the HTML from https://sportsbook.draftkings.com/event/179665865 and not the URL I entered: https://sportsbook.draftkings.com/event/179665865?category=odds&subcategory=player-props
Any ideas on how to find a solution?

Here is my original code:

import re
import urllib.request
from bs4 import BeautifulSoup

url = "https://sportsbook.draftkings.com/leagues/football/3"
request = urllib.request.Request(url)
nfl_html = urllib.request.urlopen(request).read()
soup = BeautifulSoup(nfl_html, "html.parser")
nfl_table = soup.find("div", {"class":"sportsbook-offer-category-card"})
game_links = nfl_table.find_all("a", {"class":"event-cell-link"})

#Extracting links of all NFL games 
extracted_links = []
for link in game_links:
    url = "https://sportsbook.draftkings.com" + link["href"]
    extracted_links.append(url)
unique_links = list(set(extracted_links))

#Extracting links for player props tab
pp_link_list = []
for link in unique_links:
    url = link
    request = urllib.request.Request(url)
    game_html = urllib.request.urlopen(request).read()
    soup = BeautifulSoup(game_html, "html.parser")
    player_prop_string = soup.find("a", attrs={"href":re.compile(".player-props")})
    player_prop_link = player_prop_string.get("href")
    cleaned_pp_link = "https://sportsbook.draftkings.com" + player_prop_link
    pp_link_list.append(cleaned_pp_link)

#finding Receptions tag
for link in pp_link_list:
    request = urllib.request.Request(link)
    receptions_html = urllib.request.urlopen(request).read()
    soup = BeautifulSoup(receptions_html, "html.parser")
    pp_receptions = soup.find("a", string="Receptions")
    print(pp_receptions)

Here is the code from the tutorial for scraping JavaScript:

import requests

url = "https://sportsbook.draftkings.com/seo/event/179665865?category=odds&subcategory=player-props"

payload={}
headers = {
  'Connection': 'keep-alive',
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
  'Accept': '*/*',
  'Sec-Fetch-Site': 'same-origin',
  'Sec-Fetch-Mode': 'cors',
  'Sec-Fetch-Dest': 'empty',
  'Referer': 'https://sportsbook.draftkings.com/event/179665865?category=odds&subcategory=player-props',
  'Accept-Language': 'en-US,en;q=0.9',
  'Cookie': '_abck=DBDA89A5A31D9FEFB47C67D719658B43~-1~YAAQHfs7F5OGUJN2AQAAYA/mlgXT6aeFPsQvZz54rGwtk/Ly825VmhBkHjVKfgFl8zCTUQ2Y3nJiNrOn6DX4jJkGA/YJaYwCKG+PUE1dwqmD9QW4x7DXO8s9u9zcS+RcPVakt/wbmEBxoM4tEmWsTpnjz+EW9s9NgW97dezt5ap1hEjFpMCTsa56T+q9RfuZKdSC/K7Gdps6hYt/FWb39j24XVIXTlLXvplE0btKZ+LyFb6bSJCM6prARRjZzR4N2ZnKq4unE/LIDIWJSKMDJ0DZ1SJmpy4WufraZS65EHphBXAU8rmuwJxkc6/f7g==~-1~-1~-1; _gcl_aw=GCL.1608849430.Cj0KCQiAlZH_BRCgARIsAAZHSBnmRt0mLw0NxfWBSTiF25LMSMbn6sNxhOyCIxazxiXoeZJeKtnRRWsaAlCPEALw_wcB; _gcl_dc=GCL.1608849430.Cj0KCQiAlZH_BRCgARIsAAZHSBnmRt0mLw0NxfWBSTiF25LMSMbn6sNxhOyCIxazxiXoeZJeKtnRRWsaAlCPEALw_wcB; _gcl_au=1.1.1388899746.1608849430; ken_gclid=Cj0KCQiAlZH_BRCgARIsAAZHSBnmRt0mLw0NxfWBSTiF25LMSMbn6sNxhOyCIxazxiXoeZJeKtnRRWsaAlCPEALw_wcB; _ga=GA1.2.1743418995.1608849430; _gac_UA-28146424-9=1.1608849430.Cj0KCQiAlZH_BRCgARIsAAZHSBnmRt0mLw0NxfWBSTiF25LMSMbn6sNxhOyCIxazxiXoeZJeKtnRRWsaAlCPEALw_wcB; _gac_UA-28146424-14=1.1608849430.Cj0KCQiAlZH_BRCgARIsAAZHSBnmRt0mLw0NxfWBSTiF25LMSMbn6sNxhOyCIxazxiXoeZJeKtnRRWsaAlCPEALw_wcB; ab.storage.deviceId.b543cb99-2762-451f-9b3e-91b2b1538a42=%7B%22g%22%3A%2252f3867a-5f17-3c2e-94bd-5d3249f64d04%22%2C%22c%22%3A1608849430261%2C%22l%22%3A1608849430261%7D; _scid=64979b4d-306e-49e8-882b-376874a71712; _fbp=fb.1.1608849430431.2041993013; _csrf=e296fdf6-e4e5-45f9-bee0-50903c5b6c43; clientDateOffset=300; __helocckid=60b2e760-1d5a-f092-2d2b-182e5d900410; _ga=GA1.3.1743418995.1608849430; quickStartWelcomeModalHidden=1; _gid=GA1.2.1425230282.1611114838; _gid=GA1.3.1425230282.1611114838; _sctr=1|1611032400000; bm_sz=CE022114B11530B2AA11F32C83D2F5E0~YAAQlADARR2NnvV2AQAA6mmxIQq4vlc3GPjcRNuA2sgi8hzL4xa/VmvykJ2OzqJ2MOAaYkOZGi2jSasJGdaOKMNZxmaCuIuFI/+68dSFNeuXsli3tilqlsTTLAYADbMtRAkr/mvrNXVNnB2xyeLtw2Z1QMTrpr3dgKbu/s8yKJJmqM23CI/3T2JDFmESDLIA/hGt; STIDN=eyJDIjoxMjIzNTQ4NTIzLCJTIjoxNjA0NTA0MTkxNiwiU1MiOjE2NjI1MzE1OTM1LCJWIjo4NDA3NjA4OTYxLCJMIjoxLCJFIjoiMjAyMS0wMS0yMVQwMDo1ODo0MS4wMDI3MjEyWiIsIlNFIjoiVVMtREsiLCJVQSI6IkFMQkN1MEZKRm0xOEJ4NkI1NVNEaG5DV2x4RWhZUnpoeS8xbTVHYTRhcm89IiwiREsiOiI0MDI2MDRiNi1mNzUxLTQzYjgtYWUzYy03NWE2MTA3ODEyYjAiLCJESSI6IjlkOWUwNDkyLTk3MmQtNGViMi04NzM4LWQ1ZmE4NjI2ODZmNSIsIkREIjoyMTc1MDY4NTUyfQ==; STH=25ed8076576d85fd5e714c743efafe863bbe9992636279042f125bf814876dc7; ak_bmsc=33918B58D43253116BF830CEFC5451F445C000AC83470000B9CA08603E37D434~plrtxo6LJtZGvCmFfba4SxJsiH+bYXoseg9EFP0ePzvm+9kiIjRZAxGl3IDBrYamORXt8uj/+iHuOah3l5BHYRebUS7QBaQLEBVGlJ1UaJGXFrYmxZcnRPGUK9yD0iapDGlEWW+iobTsAAnH6UtLK0G6GykGGTeID3GobdlNanXSP/K8StTpNEonGFATgQi659nVloFHWXsyZoSLz6JUv7HqcQGuLfwH/s+CxSzoRTrtw=; hgg=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ2aWQiOiI4NDA3NjA4OTYxIiwiZGtlLTYwIjoiMjg1IiwiZGtoLTExMSI6InRuOUJKQVpLIiwiZGtlLTExMSI6IjAiLCJka2gtMTE5IjoiaUdXR0VkTkciLCJka2UtMTE5IjoiMCIsImRrZS0xMjYiOiIzNzQiLCJka2UtMTQyIjoiNDI1IiwiZGtlLTE0NCI6IjQzMSIsImRrZS0xNDkiOiIzMzczIiwiZGtlLTE1MCI6IjU2NyIsImRrZS0xNTEiOiI0NTciLCJka2UtMTUyIjoiNDU4IiwiZGtlLTE1MyI6IjQ1OSIsImRrZS0xNTQiOiI0NjAiLCJka2UtMTU1IjoiNDYxIiwiZGtlLTE1NiI6IjQ2MiIsImRrZS0xNjgiOiI1MTgiLCJka2UtMTc5IjoiNTY5IiwiZGtlLTIwNCI6IjcxMCIsImRrZS0yMTkiOiIyMjQ2IiwiZGtlLTIyMSI6IjgxMyIsImRraC0yMjkiOiJWY3pCUExGZCIsImRrZS0yMjkiOiIwIiwiZGtlLTIzMCI6Ijg1NyIsImRrZS0yODgiOiIxMTI4IiwiZGtlLTMwMCI6IjExODgiLCJka2UtMzE4IjoiMTI2MSIsImRrZS0zNDUiOiIxMzUzIiwiZGtlLTM0NiI6IjEzNTYiLCJka2UtMzg0IjoiMTUwOSIsImRraC0zOTQiOiJTTjZQNnk5byIsImRrZS0zOTQiOiIwIiwiZGtlLTQwOCI6IjE2MTAiLCJka2UtNDE2IjoiMTY0OSIsImRrZS00MTgiOiIxNjUxIiwiZGtlLTQxOSI6IjE2NTIiLCJka2UtNDIwIjoiMTY1MyIsImRrZS00MjEiOiIxNjU0IiwiZGtlLTQyMiI6IjE2NTUiLCJka2UtNDI5IjoiMTcwNSIsImRrZS00NTIiOiIxODMyIiwiZGtlLTUyMCI6IjIxNTQiLCJka2UtNTUwIjoiMjMxOCIsImRrZS01NjQiOiIyMzc3IiwiZGtlLTU2NyI6IjIzODciLCJka2UtNTY4IjoiMjM5MCIsImRraC01ODgiOiIwS0xuMnktQSIsImRrZS01ODgiOiIwIiwiZGtlLTYzNiI6IjI2OTEiLCJka2UtNzAwIjoiMjk5MiIsImRrZS03MzkiOiIzMTQwIiwiZGtoLTc0MiI6Ik43XzBGMkpqIiwiZGtlLTc0MiI6IjAiLCJka2UtNzU3IjoiMzIxMiIsImRrZS03NjYiOiIzMjU2IiwiZGtoLTc2OCI6InQ0QXZ0djc5IiwiZGtlLTc2OCI6IjAiLCJka2gtNzc2IjoiaU1UUE9SUHYiLCJka2UtNzc2IjoiMCIsImRrZS03OTAiOiIzMzQ4IiwiZGtlLTc5NCI6IjMzNjQiLCJka2UtODA0IjoiMzQxMiIsImRrZS04MDUiOiIzNDE3IiwiZGtlLTgwNiI6IjM0MjUiLCJka2UtODA3IjoiMzQzNyIsImRrZS04MDkiOiIzNDQ4IiwiZGtlLTgyNCI6IjM1MTEiLCJka2UtODI1IjoiMzUxNCIsImRrZS04MzQiOiIzNTU3IiwiZGtlLTgzNiI6IjM1NzAiLCJka2gtODUwIjoiQWZIM0Q1Y08iLCJka2UtODUwIjoiMCIsImRrZS04NjEiOiIzNjc5IiwiZGtlLTg2NSI6IjM2OTUiLCJka2UtODczIjoiMzc0MSIsImRrZS04NzQiOiIzNzQ1IiwiZGtlLTg3NiI6IjM3NTIiLCJka2UtODc3IjoiMzc1NiIsImRrZS04ODAiOiIzNzY2IiwiZGtlLTg4MSI6IjM3NzAiLCJka2UtODgyIjoiMzc3MyIsImRraC04ODQiOiI5bnpaeXUzZiIsImRrZS04ODQiOiIwIiwiZGtlLTg4NiI6IjM3OTIiLCJka2gtODg3IjoicTh6R2NRR2wiLCJka2UtODg3IjoiMCIsImRrZS04ODgiOiIzODAzIiwiZGtoLTg5NCI6InRmNG1xNXoxIiwiZGtlLTg5NCI6IjAiLCJka2gtODk1IjoiTlBaV21wOEciLCJka2UtODk1IjoiMCIsImRrZS04OTYiOiIzODI4IiwiZGtlLTg5NyI6IjM4MjkiLCJka2UtOTA0IjoiMzg1MiIsImRrcy04NDgiOiIzNjIzIiwiZGtlLTg2OCI6IjM3MTQiLCJka2gtODc1IjoiMlNxNEw0eWMiLCJka3MtODc1IjoiMCIsImRrZS04OTIiOiIzODE4IiwiZGtoLTkwNyI6Imt2UVhieml2IiwiZGtlLTkwNyI6IjAiLCJka2UtOTA4IjoiMzg2NSIsImRrZS05MDkiOiIzODc0IiwiZGtlLTkxMSI6IjM4ODEiLCJka2UtOTEyIjoiMzg4NSIsImRraC05MTMiOiI2TjNtM0R5TiIsImRrZS05MTMiOiIwIiwiZGtlLTkwMyI6IjM4NDgiLCJuYmYiOjE2MTExODg5MzIsImV4cCI6MTYxMTE4OTIzMiwiaWF0IjoxNjExMTg4OTMyLCJpc3MiOiJkayJ9.apqbLQnG0Nj66-jWAZ09AaCUlNyu43O3rx9NsJcOiQE; _tq_id.TV-54368172-1.fee2=87ca359129cec881.1608849430.0.1611188941..; ab.storage.sessionId.b543cb99-2762-451f-9b3e-91b2b1538a42=%7B%22g%22%3A%22972cb2fe-319b-613b-c9a5-8ede2f1f01ae%22%2C%22e%22%3A1611190741949%2C%22c%22%3A1611188920508%2C%22l%22%3A1611188941949%7D; _uetsid=203570505ad311eb8977a502002b06fb; _uetvid=18d28b90552911eba93ba9fa3de3ddaf; STE="2021-01-21T01:24:07.3520115Z"; bm_sv=97E74E999901A3F2FF1DB0E898B535E8~p/KKtKWD42o7AL6FLvDZyMDzEMufDsjtAh27f1gyY2b28U6tONfYNoQvOsTRMFumXdxleL7cjyt/4QKs4wbPBII2EtIGHMsyVk6ARUqTvVaLmW+OoJsx5+DPSJqDGCE/LvHqS5qfW9tFYR67+M3Y9W/ngCy4e7RQsVSDXudsEH4=; _gat_UA-28146424-9=1; _gat_UA-28146424-14=1',
  'If-None-Match': 'W/"1c-vSZYn6NdKuXhr7+6ybkEI0KDIh8"'
}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)

2

Answers


  1. Is this what you are looking for?

    import re
    import urllib.request
    from bs4 import BeautifulSoup
    
    
    request = urllib.request.Request("https://sportsbook.draftkings.com/event/179665865?category=odds&subcategory=player-props")
    html = urllib.request.urlopen(request).read()
    soup = BeautifulSoup(html,'html.parser')
    
    my_list = []
    # links = soup.find_all("div", class_="sportsbook-table__body")
    links = soup.select('a.event-cell-link')
    for link in links:
        my_list.append(link.attrs['href'])
    print(my_list)
    

    If this is what you are looking for, then you are very close, you just need a new selector a.event-cell-link and use BeautifulSoup.select() to get the links.

    Hope that helps. Feel free to ask any further questions.


    Edit #1:

    To your additional question – some part of the page would be populated with javascript. I’ll give you a piece of code that you might find helpful:

    import re
    import urllib.request
    from bs4 import BeautifulSoup
    import json
    
    
    request = urllib.request.Request("https://sportsbook.draftkings.com/event/179665865?category=odds&subcategory=player-props")
    html = urllib.request.urlopen(request).read()
    
    data_lines = html.decode("utf-8").splitlines()
    
    for line in data_lines:
        line = line.strip()
        if line.startswith('window.__INITIAL_STATE__ = '):
            json_str = line.replace('window.__INITIAL_STATE__ = ', '')[:-1]
            json_dict = json.loads(json_str)
    print(json_dict.keys())
    
    # dict_keys(['eventGroups', 'error', 'outcomes', 'sports', 'displayGroup', 'offers', 'settings', 'loading', 'quickLinks', 'strapline', 'featured', 'user', 'event', 'modals', 'experiments', 'rewards', 'myBets', 'seo', 'promotions', 'userOptedInPromotions', 'carouselCards', 'betslip', 'teamPages', 'playoffPages'])
    

    Now you can work with the dict. Let me know if this helps.

    Login or Signup to reply.
  2. You could pull out the json string within the html, or just get the json from the api. Either way, you’ll need to iterate thriugh to parse it. It is a nested json, so it can be tricky (especially for someone new/learning), but possible to do:

    import requests
    import pandas as pd
    
    
    url = 'https://sportsbook.draftkings.com//sites/US-SB/api/v1/event/179665865'
    headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Mobile Safari/537.36'}
    payload = {
    'includePromotions': 'true',
    'format': 'json'}
    
    data = requests.get(url,headers=headers,params=payload).json()
    events = data['eventCategories']
    
    rows = []
    for event in events:
        row = {}
        name = event['name']
        row['name'] = name
    
        componentizedOffers = event['componentizedOffers']
        for each in componentizedOffers:
            subcategoryName = each['subcategoryName']
            row['subcategoryName'] = subcategoryName
            
            offers = each['offers']
            for offer in offers:
                for play in offer:
                    offers_label = play['label']
                    row['offers.label'] = offers_label
                    
                    outcomes = play['outcomes']
                    for outcome in outcomes:
                        for key in outcome.keys():
                            if key not in ['providerOutcomeId', 'providerId', 'providerOfferId']:
                                row[key] = outcome[key]
                    
                        rows.append(row.copy())
                
    
    df = pd.DataFrame(rows)
    

    Output (of first 25 rows):

    print (df.head(25).to_string())
           name   subcategoryName      offers.label              label oddsAmerican  oddsDecimal oddsFractional  line        participant criterionName
    0   Popular              Game      Point Spread      TB Buccaneers         -117         1.86          17/20   3.5      TB Buccaneers           NaN
    1   Popular              Game      Point Spread         GB Packers         -105         1.96          19/20  -3.5         GB Packers           NaN
    2   Popular              Game      Total Points               Over         -112         1.90           9/10  51.0         GB Packers           NaN
    3   Popular              Game      Total Points              Under         -109         1.92          10/11  51.0         GB Packers           NaN
    4   Popular              Game         Moneyline      TB Buccaneers         +150         2.50            6/4  51.0      TB Buccaneers           NaN
    5   Popular              Game         Moneyline         GB Packers         -175         1.58            4/7  51.0         GB Packers           NaN
    6   Popular  Touchdown Scorer  Touchdown Scorer      Davante Adams         +525         6.25           21/4  51.0      Davante Adams         First
    7   Popular  Touchdown Scorer  Touchdown Scorer      Davante Adams         +525         6.25           21/4  51.0      Davante Adams          Last
    8   Popular  Touchdown Scorer  Touchdown Scorer      Davante Adams         -182         1.55          11/20  51.0      Davante Adams      To Score
    9   Popular  Touchdown Scorer  Touchdown Scorer        Aaron Jones         +850         9.50           17/2  51.0        Aaron Jones         First
    10  Popular  Touchdown Scorer  Touchdown Scorer        Aaron Jones         +850         9.50           17/2  51.0        Aaron Jones          Last
    11  Popular  Touchdown Scorer  Touchdown Scorer        Aaron Jones         +115         2.15          23/20  51.0        Aaron Jones      To Score
    12  Popular  Touchdown Scorer  Touchdown Scorer         Mike Evans        +1000        11.00           10/1  51.0         Mike Evans         First
    13  Popular  Touchdown Scorer  Touchdown Scorer         Mike Evans        +1000        11.00           10/1  51.0         Mike Evans          Last
    14  Popular  Touchdown Scorer  Touchdown Scorer         Mike Evans         +135         2.35          27/20  51.0         Mike Evans      To Score
    15  Popular  Touchdown Scorer  Touchdown Scorer      Robert Tonyan        +1100        12.00           11/1  51.0      Robert Tonyan         First
    16  Popular  Touchdown Scorer  Touchdown Scorer      Robert Tonyan        +1100        12.00           11/1  51.0      Robert Tonyan          Last
    17  Popular  Touchdown Scorer  Touchdown Scorer      Robert Tonyan         +150         2.50            6/4  51.0      Robert Tonyan      To Score
    18  Popular  Touchdown Scorer  Touchdown Scorer       Chris Godwin        +1200        13.00           12/1  51.0       Chris Godwin         First
    19  Popular  Touchdown Scorer  Touchdown Scorer       Chris Godwin        +1200        13.00           12/1  51.0       Chris Godwin          Last
    20  Popular  Touchdown Scorer  Touchdown Scorer       Chris Godwin         +165         2.65          33/20  51.0       Chris Godwin      To Score
    21  Popular  Touchdown Scorer  Touchdown Scorer  Leonard Fournette        +1300        14.00           13/1  51.0  Leonard Fournette         First
    22  Popular  Touchdown Scorer  Touchdown Scorer  Leonard Fournette        +1300        14.00           13/1  51.0  Leonard Fournette          Last
    23  Popular  Touchdown Scorer  Touchdown Scorer  Leonard Fournette         +190         2.90          19/10  51.0  Leonard Fournette      To Score
    24  Popular  Touchdown Scorer  Touchdown Scorer       Allen Lazard        +1400        15.00           14/1  51.0       Allen Lazard         First
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search