skip to Main Content

I am trying to retrieve the variable in json format to python.

url = "https://limitededt.com/collections/footwear/products/adidas-originals-jonah-hill-superstar-fw7577"
source = requests.get(url).text

soup = BeautifulSoup(source, 'lxml')
print(soup.findAll('script')[10])

The code above produces:

<script>window.ShopifyAnalytics = window.ShopifyAnalytics || {};
window.ShopifyAnalytics.meta = window.ShopifyAnalytics.meta || {};
window.ShopifyAnalytics.meta.currency = 'SGD';
var meta = {"product":{"id":4738795503687,"gid":"gid://shopify/Product/4738795503687","vendor":"adidas Originals","type":"footwear","variants":[{"id":32432939204679,"price":18000,"name":"+ Jonah Hill Superstar - 7","public_title":"7","sku":"FW7577"},{"id":32432939237447,"price":18000,"name":"+ Jonah Hill Superstar - 7.5","public_title":"7.5","sku":"FW7577"},{"id":32432939270215,"price":18000,"name":"+ Jonah Hill Superstar - 8","public_title":"8","sku":"FW7577"},{"id":32432939302983,"price":18000,"name":"+ Jonah Hill Superstar - 8.5","public_title":"8.5","sku":"FW7577"},{"id":32432939335751,"price":18000,"name":"+ Jonah Hill Superstar - 9","public_title":"9","sku":"FW7577"},{"id":32432939368519,"price":18000,"name":"+ Jonah Hill Superstar - 9.5","public_title":"9.5","sku":"FW7577"},{"id":32432939401287,"price":18000,"name":"+ Jonah Hill Superstar - 10","public_title":"10","sku":"FW7577"},{"id":32432939434055,"price":18000,"name":"+ Jonah Hill Superstar - 10.5","public_title":"10.5","sku":"FW7577"},{"id":32432939466823,"price":18000,"name":"+ Jonah Hill Superstar - 11","public_title":"11","sku":"FW7577"},{"id":32432939499591,"price":18000,"name":"+ Jonah Hill Superstar - 11.5","public_title":"11.5","sku":"FW7577"}]},"page":{"pageType":"product","resourceType":"product","resourceId":4738795503687}};
for (var attr in meta) {
  window.ShopifyAnalytics.meta[attr] = meta[attr];
}</script>

Basically I want to retrieve the meta variable’s content and access the key value pairs in python.

How do I do that?

2

Answers


  1. import re, json
    
    url = "https://limitededt.com/collections/footwear/products/adidas-originals-jonah-hill-superstar-fw7577"
    source = requests.get(url).text
    
    soup = BeautifulSoup(source, 'lxml')
    script = soup.findAll('script')[10].text.strip()
    
    p = re.compile("meta = ({(.*)})")
    m = p.search(script)
    json_str = m[1]
    
    json_data = json.loads(json_str)
    print(json_data)
    

    Output:

    {'product': {'id': 4738795503687,
      'gid': 'gid://shopify/Product/4738795503687',
      'vendor': 'adidas Originals',
      'type': 'footwear',
      'variants': [{'id': 32432939204679,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 7',
        'public_title': '7',
        'sku': 'FW7577'},
       {'id': 32432939237447,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 7.5',
        'public_title': '7.5',
        'sku': 'FW7577'},
       {'id': 32432939270215,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 8',
        'public_title': '8',
        'sku': 'FW7577'},
       {'id': 32432939302983,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 8.5',
        'public_title': '8.5',
        'sku': 'FW7577'},
       {'id': 32432939335751,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 9',
        'public_title': '9',
        'sku': 'FW7577'},
       {'id': 32432939368519,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 9.5',
        'public_title': '9.5',
        'sku': 'FW7577'},
       {'id': 32432939401287,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 10',
        'public_title': '10',
        'sku': 'FW7577'},
       {'id': 32432939434055,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 10.5',
        'public_title': '10.5',
        'sku': 'FW7577'},
       {'id': 32432939466823,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 11',
        'public_title': '11',
        'sku': 'FW7577'},
       {'id': 32432939499591,
        'price': 18000,
        'name': '+ Jonah Hill Superstar - 11.5',
        'public_title': '11.5',
        'sku': 'FW7577'}]},
     'page': {'pageType': 'product',
      'resourceType': 'product',
      'resourceId': 4738795503687}}
    
    Login or Signup to reply.
  2. This works for me

    import json
    import requests
    
    url = "https://limitededt.com/collections/footwear/products/adidas-originals-jonah-hill-superstar-fw7577"
    json_data = session.get(url + '.json').json()
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search