skip to Main Content

I am trying to webscrape the name, price, and description of products listed on an online shop. The website link is https://eshop.nomin.mn/n-foods.html

enter image description here

When I look through the HTML code of the page, I get the relevant div class containers but when I reference it in my code as such, I get no values when I run my spider. I think one reason would be if the website is Javascript based and is dynamic which would require me to use Splash. However, I don’t think this is the case for my issue.

def parse(self, response, **kwargs):
    cards = response.xpath('//div[@class="item-itemmainroot-1lZ"]')

    # parse details
    for card in cards:
        price = card.xpath(".//a[contains(@class, 'item-nameLenght-K5Z item-name-3TH')]/span()/text()").extract()

Full Code:

import scrapy
import re


class TempSpider(scrapy.Spider):
    name = 'temp_spider'
    allowed_domains = ['https://eshop.nomin.mn/']
    start_urls = ['https://eshop.nomin.mn/n-foods.html']

    def parse(self, response, **kwargs):
        cards = response.xpath('//div[@class="item-itemmainroot-1lZ"]')

        # parse details
        for card in cards:
            price = card.xpath(".//a[contains(@class, 'item-nameLenght-K5Z item-name-3TH')]/span()/text()").extract()
            
            item = {'price': price
                    }
            yield item


  [1]: https://i.stack.imgur.com/iokmo.png

All and any help is greatly appreciated. I can’t seem to figure out what I am doing wrong.

2

Answers


  1. Use .

    import requests
    import json
    import pandas as pd
    from bs4 import BeautifulSoup as bs
    
    url =  'https://eshop.nomin.mn/graphql?query=query+category%28%24pageSize%3AInt%21%24currentPage%3AInt%21%24filters%3AProductAttributeFilterInput%21%24sort%3AProductAttributeSortInput%29%7Bproducts%28pageSize%3A%24pageSize+currentPage%3A%24currentPage+filter%3A%24filters+sort%3A%24sort%29%7Bitems%7Bid+name+sku+brand+salable_qty+brand_name+c21_available+c21_business_type+c21_reference+c21_street+c21_area+c21_bed_room+mp_daily_deal%7Bcreated_at+date_from+date_to+deal_id+deal_price+remaining_time+deal_qty+discount_label+is_featured+product_id+product_name+product_sku+sale_qty+status+store_ids+updated_at+__typename%7Dnew_to_date+short_description%7Bhtml+__typename%7DproductAttributes%7Bname+value+__typename%7Dprice%7BregularPrice%7Bamount%7Bcurrency+value+__typename%7D__typename%7D__typename%7Dspecial_price+special_to_date+thumbnail%7Bfile_small+url+__typename%7Durl_key+url_suffix+mp_label_data%7Benabled+name+priority+label_template+label_image+to_date+__typename%7D...on+ConfigurableProduct%7Bvariants%7Bproduct%7Bsku+special_price+price%7BregularPrice%7Bamount%7Bcurrency+value+__typename%7D__typename%7D__typename%7D__typename%7D__typename%7D__typename%7D__typename%7Dpage_info%7Btotal_pages+__typename%7Dtotal_count+__typename%7D%7D&operationName=category&variables=%7B%22currentPage%22%3A1%2C%22id%22%3A24175%2C%22filters%22%3A%7B%22category_id%22%3A%7B%22in%22%3A%2224175%22%7D%7D%2C%22pageSize%22%3A50%2C%22sort%22%3A%7B%22position%22%3A%22DESC%22%7D%7D'
    
    
    
     r = requests.get(url)
    json_data = json.loads(r.text)
    data_docs = json_data['data']['products']['items']
    
    df = pd.DataFrame.from_dict(data_docs)
    print(df)
    
    Login or Signup to reply.
  2. Use the websites data api instead of the website url that you visit in your browser. It will return a json object that has all the information you are looking for.

    import scrapy
    import re
    
    
    class TempSpider(scrapy.Spider):
        name = 'temp_spider'
        allowed_domains = ['https://eshop.nomin.mn/']
        start_urls = ['https://eshop.nomin.mn/graphql?query=query+category($pageSize:Int!$currentPage:Int!$filters:ProductAttributeFilterInput!$sort:ProductAttributeSortInput){products(pageSize:$pageSize+currentPage:$currentPage+filter:$filters+sort:$sort){items{id+name+sku+brand+salable_qty+brand_name+c21_available+c21_business_type+c21_reference+c21_street+c21_area+c21_bed_room+mp_daily_deal{created_at+date_from+date_to+deal_id+deal_price+remaining_time+deal_qty+discount_label+is_featured+product_id+product_name+product_sku+sale_qty+status+store_ids+updated_at+__typename}new_to_date+short_description{html+__typename}productAttributes{name+value+__typename}price{regularPrice{amount{currency+value+__typename}__typename}__typename}special_price+special_to_date+thumbnail{file_small+url+__typename}url_key+url_suffix+mp_label_data{enabled+name+priority+label_template+label_image+to_date+__typename}...on+ConfigurableProduct{variants{product{sku+special_price+price{regularPrice{amount{currency+value+__typename}__typename}__typename}__typename}__typename}__typename}__typename}page_info{total_pages+__typename}total_count+__typename}}&operationName=category&variables={"currentPage":1,"id":24175,"filters":{"category_id":{"in":"24175"}},"pageSize":50,"sort":{"position":"DESC"}}']
    
        def parse(self, response, **kwargs):
            data = response.json()
            print(data.keys())
            for item in data['data']["products"]["items"]:
                yield {
                    "name": item["name"],
                    "price": item["price"]["regularPrice"]["amount"]["value"]
                }
    
    

    Partial OUTPUT

    
    {'name': 'Хиам Аялал кг', 'price': 19559}
    {'name': 'Чихэр Княжеские 1кг', 'price': 24859}
    {'name': 'Жимсний чанамал Mr', 'price': 11999}
    {'name': 'Vit C ', 'price': 28799}
    {'name': 'Жүүс Моя семья', 'price': 3629}
    {'name': 'Муурны ялгадас шингээх', 'price': 31999}
    {'name': 'Компот Vidan 920гр', 'price': 8879}
    {'name': 'Мөс 0.5кг 024218', 'price': 2029}
    {'name': 'Өргөст хэмх Hainich', 'price': 7799}
    {'name': 'Соус чилитэй 215гр', 'price': 9499}
    {'name': 'Цай Ottogi улаан', 'price': 14299}
    {'name': 'Цай шингэн Pfanner', 'price': 9379}
    {'name': '02381088', 'price': 3179}
    {'name': 'Өглөөний хоол G&G', 'price': 8239}
    {'name': '02S003167', 'price': 7699}
    {'name': '02S003133', 'price': 8299}
    {'name': 'Кофе Жокей империал', 'price': 14279}
    {'name': 'Жүүс Pfanner orange', 'price': 13129}
    {'name': 'Цуу улаан дарсны', 'price': 6939}
    {'name': 'Оливын тос Borges', 'price': 14749}
    {'name': 'Оливын тос classic', 'price': 33629}
    {'name': 'Оливын тос Borges', 'price': 18629}
    {'name': 'Гоймон Borges Fusilli', 'price': 5939}
    {'name': 'Цай шингэн чавганы', 'price': 2469}
    {'name': 'Гоймон Нүүдэл 500гр', 'price': 3759}
    {'name': 'Муурны хоол 85гр', 'price': 1889}
    {'name': 'Бэлэн Карри зөөлөн', 'price': 7499}
    {'name': 'Цай Dr.Baatar 2гр*16ш', 'price': 11999}
    {'name': 'Нухаш Urbanek ', 'price': 6979}
    {'name': 'Вандуй лууван холимог', 'price': 5899}
    {'name': 'Өргөст хэмх Bagro', 'price': 13499}
    {'name': 'Бэлэн хоол Samyang', 'price': 6189}
    {'name': 'Жүүс Naturalis apple', 'price': 1589}
    {'name': 'Жүүс Naturalis Apple-grape', 'price': 5999}
    {'name': 'Жүүс Naturalis Apple-sour', 'price': 5999}
    {'name': 'Жүүс Vita Pomegranate', 'price': 3659}
    {'name': 'Шоколад Luna 33гр', 'price': 1499}
    {'name': 'Жүүс Фруктовый Сад', 'price': 5999}
    {'name': 'Жүүс Фруктовый Сад', 'price': 5299}
    {'name': 'Жүүс Фруктовый Сад', 'price': 5299}
    {'name': 'Жүүс Фруктовый Сад', 'price': 5299}
    {'name': 'Жүүс Фруктовый Сад', 'price': 5299}
    
    

    You can find the url for the api in the network tab in your browsers devtools… enter image description here

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search