skip to Main Content

I am trying to build a scraper that checks a product and the quantity in stock and then put it in a data frame so it is easily visualised. So far I have got the following code (used Postman for this).

import requests
import pandas as pd
import json

url = "https://rebuyengine.com/api/v1/custom/id/37597? key=c30f8541bcce849905613e432e4c7c9170829adf&limit=1&url=https%3A%2F%2Fspearmintlove.com%2Fproducts%2Frylee-cru-beach-bucket-hat-laurel-plaid&shopify_product_ids=7599681372332&shopify_variant_ids=&shopify_collection_ids=0&shopify_order_ids=&metafields=yes&variant_metafields=yes&uuid=3f8fc02e-3476-4583-80b4-85f9d6670e2e&cache_key=1678199511"

payload={}
headers = {
'authority': 'rebuyengine.com',
'accept': '*/*',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
'origin': 'https://spearmintlove.com',
'referer': 'https://spearmintlove.com/',
'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)  Chrome/111.0.0.0 Safari/537.36'
}

response = requests.request("GET", url, headers=headers, data=payload)

r = response.json()

print(json.dumps(r, indent=4))

The JSON that is Pretty Printed is as follows. I know that I need to extract from ‘variants’ and from that I want the ‘inventory_quantity’ and ‘sku’. I have removed URL’s from the JSON so it did not flag as spam.

{
    "data": [],
    "metadata": {
        "input_products": [
            {
                "admin_graphql_api_id": "gid://shopify/Product/7599681372332",
                "body_html": "<p class="p1"><span class="s1">Our beach bucket hat is the perfect accessory for any summer outfit while keeping your little one protected from the 
sun.</span></p>n<p class="p2"><span class="s1"></span><br></p>n<p class="p1"><span 
class="s1">Featuring our 'laurel plaid' all over print.</span></p>n<p class="p2"><span class="s1"></span><br></p>n<p class="p1"><span class="s1">Made of 55% Linen, 45% Viscose</span></p>",
                "created_at": "2023-03-15T15:18:15-07:00",
                "handle": "rylee-cru-beach-bucket-hat-laurel-plaid",
                "id": 7599681372332,
                "image": {
                    "admin_graphql_api_id": "gid://shopify/ProductImage/35225543770284",   
                    "alt": null,
                    "created_at": "2023-03-15T15:20:10-07:00",
                    "height": 2000,
                    "id": 35225543770284,
                    "metafields": [],
                    "position": 1,
                    "product_id": 7599681372332,
                    "src": "removed",
                    "updated_at": "2023-03-15T15:20:10-07:00",
                    "variant_ids": [],
                    "width": 2000
                },
                "images": [
                    {
                        "admin_graphql_api_id": "gid://shopify/ProductImage/35225543770284",
                        "alt": null,
                        "created_at": "2023-03-15T15:20:10-07:00",
                        "height": 2000,
                        "id": 35225543770284,
                        "metafields": [],
                        "position": 1,
                        "product_id": 7599681372332,
                        "src": "removed",
                        "updated_at": "2023-03-15T15:20:10-07:00",
                        "variant_ids": [],
                        "width": 2000
                    }
                ],
                "options": [
                    {
                        "id": 9786184302764,
                        "name": "Size",
                        "position": 1,
                        "product_id": 7599681372332,
                        "values": [
                            "S/M (49 cm)",
                            "M/L (52 cm)"
                        ]
                    }
                ],
                "product_type": "",
                "published_at": "2023-03-22T08:25:33-07:00",
                "published_scope": "web",
                "status": "active",
                "tags": "cf-size-m-l-52-cm, cf-size-s-m-49-cm, cf-vendor-rylee-&-cru, in stock",
                "template_suffix": "",
                "title": "Rylee & Cru Beach Bucket Hat, Laurel Plaid",
                "updated_at": "2023-03-24T14:25:57-07:00",
                "vendor": "Rylee & Cru",
                "variants": [
                    {
                        "admin_graphql_api_id": "gid://shopify/ProductVariant/42473715663020",
                        "barcode": null,
                        "compare_at_price": "32.00",
                        "created_at": "2023-03-15T15:18:15-07:00",
                        "fulfillment_service": "manual",
                        "grams": 91,
                        "id": 42473715663020,
                        "image_id": null,
                        "inventory_item_id": 44558304379052,
                        "inventory_management": "shopify",
                        "inventory_policy": "deny",
                        "inventory_quantity": 0,
                        "metafields": [],
                        "old_inventory_quantity": 0,
                        "option1": "S/M (49 cm)",
                        "option2": null,
                        "option3": null,
                        "position": 1,
                        "price": "32.00",
                        "product_id": 7599681372332,
                        "requires_shipping": true,
                        "sku": "RCBeachBucketHatLaurelPlaidS/M",
                        "tax_code": "PC040501",
                        "taxable": true,
                        "title": "S/M (49 cm)",
                        "updated_at": "2023-03-24T14:23:01-07:00",
                        "weight": 0.2,
                        "weight_unit": "lb",
                        "link": "removed"
                    },
                    {
                        "admin_graphql_api_id": "gid://shopify/ProductVariant/42473715695788",
                        "barcode": null,
                        "compare_at_price": "32.00",
                        "created_at": "2023-03-15T15:18:15-07:00",
                        "fulfillment_service": "manual",
                        "grams": 91,
                        "id": 42473715695788,
                        "image_id": null,
                        "inventory_item_id": 44558304411820,
                        "inventory_management": "shopify",
                        "inventory_policy": "deny",
                        "inventory_quantity": 1,
                        "metafields": [],
                        "old_inventory_quantity": 1,
                        "option1": "M/L (52 cm)",
                        "option2": null,
                        "option3": null,
                        "position": 2,
                        "price": "32.00",
                        "product_id": 7599681372332,
                        "requires_shipping": true,
                        "sku": "RCBeachBucketHatLaurelPlaidM/L",
                        "tax_code": "PC040501",
                        "taxable": true,
                        "title": "M/L (52 cm)",
                        "updated_at": "2023-03-21T12:08:35-07:00",
                        "weight": 0.2,
                        "weight_unit": "lb",
                        "link": "removed"
                    }
                ],
                "metafields": [
                    {
                        "namespace": "custom_sort",
                        "key": "just_in",
                        "type": "date_time",
                        "value": "2023-03-15T22:18:15Z",
                        "id": 22062391656620,
                        "description": null,
                        "owner_id": 7599681372332,
                        "created_at": "2023-03-15T15:18:15-07:00",
                        "updated_at": "2023-03-15T15:18:17-07:00",
                        "owner_resource": "product",
                        "admin_graphql_api_id": "gid://shopify/Metafield/22062391656620"   
                    },
                    {
                        "namespace": "msft_bingads",
                        "key": "product_status",
                        "type": "json_string",
                        "value": "{"pending":2,"failed":0,"approved":0}",
                        "id": 22062391689388,
                        "description": null,
                        "owner_id": 7599681372332,
                        "created_at": "2023-03-15T15:18:15-07:00",
                        "updated_at": "2023-03-15T15:18:15-07:00",
                        "owner_resource": "product",
                        "admin_graphql_api_id": "gid://shopify/Metafield/22062391689388"   
                    }
                ],
                "collection_ids": [
                    276826587308,
                    278475210924,
                    278476357804,
                    278505160876,
                    283240038572
                ],
                "cache_info": {
                    "loaded_options": {
                        "metafields": true,
                        "variant_metafields": true,
                        "selling_plans": false,
                        "shopify_selling_plans_loaded": false,
                        "presentment_prices": false
                    },
                    "cached_at_time": 1679693991,
                    "cache_key": "pro_bt#ba9787f6ec6e057a60374c08549ca6f1",
                    "cache_key_readable": "pro_bt#user_id:7544#shopify_product_id:"7599681372332"#user_id:"7544""
                },
                "link": "removed",
                "owner": 7544
            }
        ],
        "cart_items": [],
        "matched_rules": [],
        "unmatched_rules": [
            {
                "logic": [
                    {
                        "rules": [
                            {
                                "type": "collection",
                                "operator": "contains_any",
                                "value": 276822720684
                            }
                        ]
                    }
                ],
                "output": [
                    {
                        "type": "collection",
                        "collection_id": 276822720684,
                        "collection_sort": "alsobot",
                        "collection_sort_type": "smart_sort"
                    }
                ],
                "exit_if_matched": false,
                "index": 0
            }
        ],
        "unevaluated_rules": [],
        "filtered_oos_products": [],
        "filtered_input_products": [],
        "global_excluded_products": [],
        "errors": [],
        "cache_info": {
            "cache_key": "c_ev3_c#fa57f544f06f403c159b60ce9d0b56a7",
            "cache_key_readable": "c_ev3_c#user_id:7544#product_ids:7599681372332#custom_endpoint_id:37597#product_ids:["7599681372332"]#user_id:"7544"#arg1:"rulesets"#arg2:"id"#custom_endpoint_id:"37597"#limit:"1"#metafields:"yes"#shopify_product_ids:"7599681372332"#variant_metafields:"yes"",
            "cached_at_time": 1679698888,
            "cache_expires": 1679788561,
            "cache_type": "partial"
        }
    }
}

2

Answers


  1. As Paul H said, the response is just one big dictionary.

    You can access the variants like;

    ...
    
    input_parameters = r['metadata']['input_products']
    for input_parameter in input_parameters:
        print(input_parameter['variants'])
    
    

    When you have the variants, you use them to get the sku and inventory_quantity. Like; sku = variant['sku']

    Login or Signup to reply.
  2. just parse as a normal dict

    As mentioned by Paul H, r is a normal dict object (before trying to use the json module on it)

    You can then do any number of normal data manipulation stuff on it (as below, where I do a pandas dataframe using headers set by implicit properties of python’s NamedTuple object.

    import pprint
    from typing import Any, NamedTuple, Self
    import requests
    import pandas as pd
    import json
    
    url = "https://rebuyengine.com/api/v1/custom/id/37597? key=c30f8541bcce849905613e432e4c7c9170829adf&limit=1&url=https%3A%2F%2Fspearmintlove.com%2Fproducts%2Frylee-cru-beach-bucket-hat-laurel-plaid&shopify_product_ids=7599681372332&shopify_variant_ids=&shopify_collection_ids=0&shopify_order_ids=&metafields=yes&variant_metafields=yes&uuid=3f8fc02e-3476-4583-80b4-85f9d6670e2e&cache_key=1678199511"
    
    payload={}
    headers = {
    'authority': 'rebuyengine.com',
    'accept': '*/*',
    'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
    'origin': 'https://spearmintlove.com',
    'referer': 'https://spearmintlove.com/',
    'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'cross-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)  Chrome/111.0.0.0 Safari/537.36'
    }
    
    response = requests.request("GET", url, headers=headers, data=payload)
    
    r = response.json()
    
    internal_list: list[dict] = r["metadata"]["input_products"]
    
    target_object: list[dict[str, Any]] = internal_list[0]["variants"]
    
    class InventoryData(NamedTuple):
        inventory_quantity: int
        sku: str
    
        @classmethod
        def from_list(cls, data: list[dict[str, Any]]) -> list[Self]:
            return [cls(blob["inventory_quantity"], blob["sku"]) for blob in data]
    
    pprint.pprint(InventoryData.from_list(target_object))
    """
     out: [InventoryData(inventory_quantity=0, sku='RCBeachBucketHatLaurelPlaidS/M'),
     InventoryData(inventory_quantity=1, sku='RCBeachBucketHatLaurelPlaidM/L')]
    """
    
    pprint.pprint(pd.DataFrame(InventoryData.from_list(target_object)))
    """
    out: 
       inventory_quantity                             sku
    0                   0  RCBeachBucketHatLaurelPlaidS/M
    1                   1  RCBeachBucketHatLaurelPlaidM/L
    """
    

    Also, as an addendum, payload and headers are unnecessary here, as far as I can tell. They don’t do anything for the functionality of the get request itself, unless you care about how the api percieves your client. it probably comes as postman boilerplate, I’d imagine.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search