skip to Main Content

Here’s a simple scrapy spider that anyone can use for testing.

from scrapy.utils.response import open_in_browser
import scrapy
import json

class TestSpider(scrapy.Spider):
    name = "test-spider"
    allowed_domains = ["shopee.ph"]

    shopee_cookies = '[{"name": "csrftoken", "value": "RvxBdTixvBfdTR3xfQwbcYippqz8jEbF", "domain": "shopee.ph", "path": "/", "expires": -1, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "_gcl_au", "value": "1.1.1251411089.1692464842", "domain": ".shopee.ph", "path": "/", "expires": 1700240842, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "SPC_SI", "value": "sTLbZAAAAABwY1ZrR1NNU+WdNgAAAAAAdzlCYXIyVVQ=", "domain": ".shopee.ph", "path": "/", "expires": 1692551246.336331, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "_fbp", "value": "fb.1.1692464842990.689078803", "domain": ".shopee.ph", "path": "/", "expires": 1700240846, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "SPC_R_T_IV", "value": "NnVEbThnRjREMnNMZVpGVQ==", "domain": ".shopee.ph", "path": "/", "expires": 1727024846.336348, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "SPC_T_ID", "value": "fn/OKngQO3doGdfFGyo/6mzLiviELHkKEbWM9J+x/ezTl/baT96grQer6ILrYX9tj3Kqs71Jg+hCimaK/XauidJXrd6HdPd2Smbxbu/fEStjOJi5g9/ucMmbBwuyh5M6H3TOGdpUop/9Q/zdpNj6MyxZaODnNsT5XprfsQxjB5g=", "domain": ".shopee.ph", "path": "/", "expires": 1727024846.336355, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "SPC_T_IV", "value": "NnVEbThnRjREMnNMZVpGVQ==", "domain": ".shopee.ph", "path": "/", "expires": 1727024846.336362, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "SPC_F", "value": "jiOtuCSNUaap3U4BHHfzhDihWwFht32f", "domain": ".shopee.ph", "path": "/", "expires": 1727024843.162052, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "REC_T_ID", "value": "dc8a2570-3eb2-11ee-ac9b-2cea7fce6c95", "domain": ".shopee.ph", "path": "/", "expires": 1727024843.16206, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "SPC_R_T_ID", "value": "fn/OKngQO3doGdfFGyo/6mzLiviELHkKEbWM9J+x/ezTl/baT96grQer6ILrYX9tj3Kqs71Jg+hCimaK/XauidJXrd6HdPd2Smbxbu/fEStjOJi5g9/ucMmbBwuyh5M6H3TOGdpUop/9Q/zdpNj6MyxZaODnNsT5XprfsQxjB5g=", "domain": ".shopee.ph", "path": "/", "expires": 1727024846.33634, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "_QPWSDCXHZQA", "value": "4a585493-a7a0-4f0e-d696-687295d3a4c3", "domain": "shopee.ph", "path": "/", "expires": 1692496379, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "IDE", "value": "AHWqTUm1b5ZflCqDTn6cpHDjyoeqH6iLfXcCOOm4YNaP8CHTsAZ7F_Daq4-zO-bsGIk", "domain": ".doubleclick.net", "path": "/", "expires": 1727024843.787698, "httpOnly": true, "secure": true, "sameSite": "None"}, {"name": "AMP_TOKEN", "value": "%24NOT_FOUND", "domain": ".shopee.ph", "path": "/", "expires": 1692468444, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "_ga", "value": "GA1.2.833255521.1692464843", "domain": ".shopee.ph", "path": "/", "expires": 1727024844.498551, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "_gid", "value": "GA1.2.1347861977.1692464844", "domain": ".shopee.ph", "path": "/", "expires": 1692551244, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "_dc_gtm_UA-61918643-6", "value": "1", "domain": ".shopee.ph", "path": "/", "expires": 1692464904, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "shopee_webUnique_ccd", "value": "raj%2F3ukNopIWTrFjVLQeGA%3D%3D%7C1%2BjiV3ga9OlzuAELTZtedUY5BlP1ZNVH5ybZJx2D4KNA9dGTvtFakjnNZvR64zKNG6yBDfEXdabTE%2FRKow%3D%3D%7CsWIQ7u7pR4F3BD7E%7C08%7C3", "domain": "shopee.ph", "path": "/", "expires": 1692496381, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "ds", "value": "065598fda3b7cca4e5e241e446a075e9", "domain": "shopee.ph", "path": "/", "expires": 1692496381, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "SPC_EC", "value": "RTJYa2Q5WEV4UDNnN3VGWr68rFv1FRJEeVkpwAzlu09WhtwSxFE1cZlwpQYRhhR56REixPuKfekz6oioE4EaDK12bvALil+QZ5B0EfG42psIFWNDe1moiErTZndyu1502KUlh5+OQoUWCvm1XkVY+2Iy7Jk5qyPI2J655JeZwv0=", "domain": ".shopee.ph", "path": "/", "expires": 1727024846.336291, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "SPC_ST", "value": ".ek1DVmo5aGJjaVBxcklYU5o4/3v/8ndPeV2/fwtzWYUh1kWOopWvn7SFoQXWuS37Rs+J+Ym7U8OwOG73JbiFRWyOOo1GhKBgwhUeeWfE+q9XPDZXACC33t7qphoBu5hyWvR/G+WkpSUbIkmGPzprCIvhw7Qwyt8UFxk/4bA+47QQQUiDcPfHIq/sJqmVMEqH3Al6nCTDeEh/JCDLALRvNQ==", "domain": ".shopee.ph", "path": "/", "expires": 1727024846.336324, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "SPC_CLIENTID", "value": "amlPdHVDU05VYWFwgvlavxoisbqjmacw", "domain": ".shopee.ph", "path": "/", "expires": 1727024846.336374, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "_ga_CB0044GVTM", "value": "GS1.1.1692464843.1.0.1692464846.57.0.0", "domain": ".shopee.ph", "path": "/", "expires": 1727024846.367333, "httpOnly": false, "secure": false, "sameSite": "Lax"}]'

    shopee_cookies = json.loads(shopee_cookies)

    def start_requests(self):
        yield scrapy.Request(
            "https://shopee.ph/api/v4/pdp/get_pc?shop_id=237078553&item_id=6929743700",
            cookies=self.shopee_cookies,
            headers={"x-api-source":"pc","af-ac-enc-dat":"null"},
            callback=self.parse_item,
        )


    def parse_item(self,response):
        open_in_browser(response)

Feel free to test it out as I provided the cookies as well (because the cookies are needed). Now as you can see, this piece of code actually worked before, around early August 2023. I had challenges to make it work before but thanks to this answer I managed to get the products data. You can even see my comment there. Here’s an image I screenshot before proving that it did work around early August.

enter image description here

As you can see the data is there and works well. Thanks to the headers {"x-api-source":"pc","af-ac-enc-dat":"null"} that made it worked. However as of August 20, 2023 as I am typing this. It seems that it doesn’t work anymore. I’m not sure why, but I think there’s some changes with the API that has happened. I spent all day trying to figure out and play with the headers but no luck. All I got right now as a result is this.

Output I am having right now:

{"is_customized":false,"is_login":true,"platform":0,"action_type":2,"error":90309999,"tracking_id":"24d95bd5-40e5-44cd-b30b-885711481170","report_extra_info":""}

Here is the actual product page link I used for testing. You can see the API there when you do "Inspect Element" -> "Network" tab. Take note that the output I am having right now is the same one I had before I managed to implement this solution. But right now it’s back at it again. So the question is, could there be a way to make it work again? I feel like it’s something with the headers that I am not getting it right, but I am not sure how to figure it out and that is why I am seeking help right now as I am out of solutions.

2

Answers


  1. As I said in a comment on another article, it seems that the their anti-crawling method has changed.

    If there is a problem with the api response, you can check the problem with the following procedure.

    1. get full api request
    curl 'https://shopee.ph/api/v4/pdp/get_pc?shop_id=237078553&item_id=6929743700' 
      -H '744a5de7:<value>' 
      -H 'authority:<value>' 
      -H 'accept:<value>' 
      -H 'accept-language:<value>' 
      -H 'af-ac-enc-dat:<value>' 
      -H 'af-ac-enc-sz-token:<value>' 
      -H $'afe5f082:<value> '
      -H 'content-type:<value>' 
      -H 'cookie:<value>' 
      -H 'dd46895e:<value>' 
      -H 'referer:<value>' 
      -H 'sec-ch-ua:<value>' 
      -H 'sec-ch-ua-mobile:<value>' 
      -H 'sec-ch-ua-platform:<value>' 
      -H 'sec-fetch-dest:<value>' 
      -H 'sec-fetch-mode:<value>' 
      -H 'sec-fetch-site:<value>' 
      -H 'sz-token:<value>' 
      -H 'user-agent:<value>' 
      -H 'x-api-source:<value>' 
      -H 'x-csrftoken:<value>' 
      -H 'x-requested-with:<value>' 
      -H 'x-sap-ri:<value>' 
      -H 'x-shopee-language:<value>' 
      -H 'x-sz-sdk-version:<value>' 
      --compressed
    

    Unlike when I tried before, something was added.
    random elements like 744a5de7, $afe5f082, dd46895e

    1. check api expire time
      some api has expire time.
      Most have expiration times in minutes, but I’ve seen sites that expire in 10 seconds.

    by test, the their api’s expire time is 60~120 seconds.

    so I need to get full api request evert 1minutes.

    1. find the necessary header elements.
      by remove one line, I can find below elements are necessary.
      -H $'24d10ef2: 
      -H '5025235d: 
      -H '744a5de7:
      -H 'af-ac-enc-dat:
      -H 'cookie:
      -H 'user-agent:
      -H 'x-csrftoken:
      -H 'x-sap-ri:
    

    random element, af-ac-enc-dat, cookie, user-agent, x-csrftoken, x-sap-ri
    is necessary.

    1. check changed value validate
      af-ac-enc-dat:null is this case.

    It’s amazing that this worked until early August.

    When I tried, "af-ac-enc-dat" is a value client creates with cookies and time.
    It should not be changed.
    af-ac-enc-dat is made by cookies, cookies also can not change.

    so you need to figure out

    • how to make random 3 element’s name and value
    • how to make af-ac-enc-dat, x-csrftoken, x-sap-ri

    It seems to me that you are getting it by logging in when you get the cookie.

    In my experience, even if you scrape through a proxy, scraping fails after a certain period of time.

    And when you access it, it says that abnormal traffic has been detected and slides to prove you’re not a robot.

    So I tried scraping with a non-login cookie.

    Scraping was possible through multiple cookie sets and multiple proxies.

    conclusion

    I tried very hard to scrape it,
    but I couldn’t get past the their ever-changing defenses.

    So I gave up at early July.

    I hope you can solve this problem.

    Login or Signup to reply.
  2. i have an error too. already checked. i think they are changing the headers req. it looks like this

    GET /api/v4/shop/get_products_tab_data?by=popular&country=ID&from_source=search&limit=50&offset=0&order=desc&shop_id=224781834&start_ts=1690041600&upstream=pdp HTTP/2
    Host: shopee.co.id
    Cookie: SPC_F=CLE38NzQuOfu72ZZK3Fbk7uSV4Rl0Om3; REC_T_ID=cbe26cbc-29fb-11ee-b7ba-f4ee08290b63; _gcl_au=1.1.2119733819.1690471447; _fbp=fb.2.1690471447353.1570533404; SPC_CLIENTID=Q0xFMzhOelF1T2Z1eyyrhwwxishawszz; _med=cpc; csrftoken=FE3h0UK9cUKTSpcXbrms3HaOuZQLI4y6; SPC_SI=tTLbZAAAAAA5dDM5MWZkVzVNhQAAAAAAa1Fnb3Njc0k=; _QPWSDCXHZQA=28c21620-bf64-4cbc-c3bb-768c09ddd94d; _gid=GA1.3.1329493720.1692587428; _gcl_aw=GCL.1692587441.EAIaIQobChMIrbCJv-PsgAMVf4FLBR3v_wQWEAAYASAAEgKW__D_BwE; _gac_UA-61904553-8=1.1692587442.EAIaIQobChMIrbCJv-PsgAMVf4FLBR3v_wQWEAAYASAAEgKW__D_BwE; SPC_P_V=Epj8h5wnEQ2sD4OURKKvdycouuCI2YHaEekG7u9ezsvZjWGjPUhg9DFetTavXFCO3C63OOwrXzWL3gx7VDRvGkixJ2mTfc0wMCLFgyaJR+LQkC4eHAmRDCMKtHmCQXgF7RGQD5PFFx7OUbXAnu1pAktVpve6NGOAmNre/519svI=; SPC_ST=.MU83cVROQjhGZURLbEttONiDXDjrLsd6vGNERy6F8f7m5LTefdR1H4zNcrAdW+VXk5rjEyEaPFJrUPnNEJF9aPcTfrS9nukBbsb7f2TWvCqSL14xvK4145VpW1935EAZ8y+TNM2Ii2NELgi9KzO/lsjyeJtM4bhHd9vpAhoZZ2KO9psk92WId+lPjpjNiJO03Q19o3Gldiqu/6uo3B/JYg==; SPC_U=1047715958; SPC_R_T_ID=pkA90hKsNpTPhCjad/aK1CSaQhpsMokArXQS2nqHnSaHqC/ct9z7kJ2SCT3qtALegWNHd6YnNt4cZgTOn+WGaEhBZGEKlLc/OTynALNC7IrPpFqcJasaF/DOKX8imMsEr+9Wnct0GJ77BzJIiYF3riCD8PuMnY05yOMIk5D4yew=; SPC_R_T_IV=UHVXbzB3MXhYMW1SUEdVVQ==; SPC_T_ID=pkA90hKsNpTPhCjad/aK1CSaQhpsMokArXQS2nqHnSaHqC/ct9z7kJ2SCT3qtALegWNHd6YnNt4cZgTOn+WGaEhBZGEKlLc/OTynALNC7IrPpFqcJasaF/DOKX8imMsEr+9Wnct0GJ77BzJIiYF3riCD8PuMnY05yOMIk5D4yew=; SPC_T_IV=UHVXbzB3MXhYMW1SUEdVVQ==; shopee_webUnique_ccd=9wlap15q38u%2FjyDqeMXhSw%3D%3D%7CZJYFYh7ffzXrGWxFVrISb4CSSzK9huoBudwqWNpox09%2BIx9P3SCcAlZk3OsGAXz5TLVKLpJ0a0iBycFR%7CQeZNQ4uANkX3tHiI%7C08%7C3; ds=76644e11d13873c9fe6a40f193246e70; _ga=GA1.1.2011873235.1690471457; _dc_gtm_UA-61904553-8=1; SPC_EC=RklXT0ZlZVA2SmNDaTBIQslEsqR691BJDKmMLa1HhAOn7CSTgdQMN47UZ11FxNpmCyIvU7bi0z4zj64SpXGi094UHX2jaE6xBmOYAkJYNhFUdS/dRHoI1mcdwpf9rVJ8+kK8og9iFWju20lnzu1cuWyqQXf60ZF9JcbK3lQvHNA=; _ga_SW6D8G0HXK=GS1.1.1692600956.8.1.1692600957.59.0.0
    2495ee40: *GTT#ILZ,J*RgDF&)2Sth7u:Q
    X-Sz-Sdk-Version: 2.9.2-2&1.4.1
    94d942d2: W1+Q&gY<k'_qcBegg9t1Fo*q5FJ!f]'!H/Bbi&LJH7M$L=]"p.F8/*q_!ZNU(E%'CkE'0R-3ps8iL[Nc&G44^P_@_C"ik0h7kT-lWVD"e/[>Z7X1YHj)2[`ruj8A[XO3+]gHb#[5fKf70Gr%e:G6f$G3dPVl;)b)lbWSB^J`Kff9;/IeaQ(%b'+iLX)]Ku4D@?:u'0?*@*(%$[qb0Tf%J[Ug6fd`H+c!,k>P.Vkf!4q2*6,3>)KYh9eJ,:^?H=5iR1usTREU^.A-g_%.NG6W9bt+?8`Q0BE(gTS%N/oQ5sfi!?V$9pdYBfOdh5Ltr,Q2ADr::L*%[LDd:L@W,X.%)b4lLId@>G1mjFUrJ-cn
    Content-Type: application/json
    X-Api-Source: rweb
    Accept: application/json
    X-Sap-Ri: 7f0ae3643a8576b6061b7b3e6051550dd3d3ae970d1848a4
    X-Shopee-Language: id
    X-Requested-With: XMLHttpRequest
    Af-Ac-Enc-Dat: AAcyLjkuMi0yAAABihYUIzMAAAk4AhAAAAAAAAAAAj+vsCPwylZoRHPu2FkyOahImkli6vLJCbuzSNfcvOA2MvU3/glR7XXjpBf9OrfqNg0mzEILeQPCPwQV0LGu6fsc6r5SZ42c29f5Z5ZZkl4yABQf6F0BVj8sQ0WYkYmoESJOfiuWtwIWPXWm0HBmx8M2f0K9Ewk+vnXz4P7b/AUmFUafOtiMif1YsmByR1llShH1Us3tzoAHKX+I31z8KKr7bebm+/ENf3wKONAR64IoqmkR6k8atRN4Yc7bLxyKnY2b65J2uyInBO0b3ZjL35k2S3rNoDsVvIeo353RcLV9UURJuBX5jq4Vq+gCwVHcilv9U0yTna/j6pLZztTVoRnT2+/me2cRbNI3CgQTcHMq+u/tI/m5zHMDo/4eXEjxrR7QQhOX/zYTJ8PH9tVFw9ppG9ibG96q2PCCXr4o3YBLU4FYNS9R2gSyN55l0IR8SGtsTv5PXjEGGWy3DMVVukZKDs+2xRv0gXc8lWmt0UYfw3ZPVH4/olSOzfJYqlihOvXD45KX/zYTJ8PH9tVFw9ppG9ibl/82EyfDx/bVRcPaaRvYmwEDdbQH+lNoUXKNoOx37UC8g0SuAAy3ruovb6c/Rh1R1rDHZxOYuVToBnEUdNMU8iF9xjfeiUsBvxyc2zYqd8M314W5o9rQJlSKvfvREBIMgl/OQC8M5ma/UJgoo7r+g+34mcW/JFW26nxtNEcHnpU=
    User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1
    X-Csrftoken: FE3h0UK9cUKTSpcXbrms3HaOuZQLI4y6
    4a12d42b: 1;fI>#8q*F>+W8`]Yi@Wr>rZ*
    Af-Ac-Enc-Sz-Token: 9wlap15q38u/jyDqeMXhSw==|ZJYFYh7ffzXrGWxFVrISb4CSSzK9huoBudwqWNpox09+Ix9P3SCcAlZk3OsGAXz5TLVKLpJ0a0iBycFR|QeZNQ4uANkX3tHiI|08|3
    Sec-Fetch-Site: same-origin
    Sec-Fetch-Mode: cors
    Sec-Fetch-Dest: empty
    Referer: https://shopee.co.id/vearst?shop=224781834&tab=1&upstream=search
    Accept-Encoding: gzip, deflate
    Accept-Language: en-US,en;q=0.9
    

    hope it helps

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search