skip to Main Content

I’m trying to write a program that lets me easily scale recipes created using the wordpress recipe maker plugin. I have already been advised to use beautifulsoup instead of parsing HTML with regex, and it does what it’s supposed to – just a little too much of it, since instead of giving me ever entry of the recipe once, it outputs it three times. The problem appears both in Spyder and when using python3 in the ubuntu command line.

This is the code i am using:

from bs4 import BeautifulSoup
import requests

def get_ingredients_info_from_html(html):
    
    
    soup = BeautifulSoup(html, 'html.parser')

    ingredients = []

    class_name = 'wprm-recipe-ingredient'
    
    for ingredient in soup.select(f'.{class_name}'):
        record = dict.fromkeys(['amount', 'unit', 'name', 'notes'])
    
        for info in record:
            element = ingredient.select_one(f'.{class_name}-{info}')
    
            if element:
                record[info] = element.text
                ingredients.append(record)
    
    return ingredients

url = 'https://handletheheat.com/wprm_print/30199' # cookie recipe yay

print(get_ingredients_info_from_html(requests.get(url).content))

which ouputs

[{'amount': '3', 'unit': 'cups', 'name': '(380 grams) all-purpose flour', 'notes': None}, {'amount': '3', 'unit': 'cups', 'name': '(380 grams) all-purpose flour', 'notes': None}, {'amount': '3', 'unit': 'cups', 'name': '(380 grams) all-purpose flour', 'notes': None}, {'amount': '1', 'unit': 'teaspoon', 'name': 'baking soda', 'notes': None}, {'amount': '1', 'unit': 'teaspoon', 'name': 'baking soda', 'notes': None}, {'amount': '1', 'unit': 'teaspoon', 'name': 'baking soda', 'notes': None}, {'amount': '1', 'unit': 'teaspoon', 'name': 'fine sea salt', 'notes': None}, {'amount': '1', 'unit': 'teaspoon', 'name': 'fine sea salt', 'notes': None}, {'amount': '1', 'unit': 'teaspoon', 'name': 'fine sea salt', 'notes': None}, {'amount': '2', 'unit': None, 'name': 'sticks (227 grams) unsalted butter, at cool room temperature (67°F)', 'notes': None}, {'amount': '2', 'unit': None, 'name': 'sticks (227 grams) unsalted butter, at cool room temperature (67°F)', 'notes': None}, {'amount': '1/2', 'unit': 'cup', 'name': '(100 grams) granulated sugar', 'notes': None}, {'amount': '1/2', 'unit': 'cup', 'name': '(100 grams) granulated sugar', 'notes': None}, {'amount': '1/2', 'unit': 'cup', 'name': '(100 grams) granulated sugar', 'notes': None}, {'amount': '1 1/4', 'unit': 'cups', 'name': '(247 grams) lightly packed light brown sugar', 'notes': None}, {'amount': '1 1/4', 'unit': 'cups', 'name': '(247 grams) lightly packed light brown sugar', 'notes': None}, {'amount': '1 1/4', 'unit': 'cups', 'name': '(247 grams) lightly packed light brown sugar', 'notes': None}, {'amount': '2', 'unit': 'teaspoons', 'name': 'vanilla', 'notes': None}, {'amount': '2', 'unit': 'teaspoons', 'name': 'vanilla', 'notes': None}, {'amount': '2', 'unit': 'teaspoons', 'name': 'vanilla', 'notes': None}, {'amount': '2', 'unit': None, 'name': 'large eggs, at room temperature', 'notes': None}, {'amount': '2', 'unit': None, 'name': 'large eggs, at room temperature', 'notes': None}, {'amount': '2', 'unit': 'cups', 'name': '(340 grams) semisweet chocolate chips', 'notes': None}, {'amount': '2', 'unit': 'cups', 'name': '(340 grams) semisweet chocolate chips', 'notes': None}, {'amount': '2', 'unit': 'cups', 'name': '(340 grams) semisweet chocolate chips', 'notes': None}]

From this question, I can gather that it probably has something to do with the nested for loops, however I still do not know why this causes this behaviour, and also how to implement what I want without the nested for loops.

2

Answers


  1. Please move ingredients.append(record) out of the record for loop.

    Login or Signup to reply.
  2. appending into list should be in first loop

    from bs4 import BeautifulSoup
    import requests
    import json
    
    
    def get_ingredients_info_from_html(html):
        ingredients_list = list()
        class_name = "wprm-recipe-ingredient"
        keys = ["amount", "unit", "name", "notes"]
    
        soup = BeautifulSoup(html, "html.parser")
    
        data = soup.find_all("li", class_=class_name)
    
        for ingredient in data:
            record = dict.fromkeys(keys)
            for key in keys:
                _ = ingredient.select_one(f".{class_name}-{key}")
    
                if hasattr(_, "text"):
                    record[key] = _.text
            ingredients_list.append(record)
        return ingredients_list
    
    
    url = "https://handletheheat.com/wprm_print/30199"  # cookie recipe yay
    
    html = requests.get(url).content
    print(json.dumps(get_ingredients_info_from_html(html), indent=4))
    
    
    [
        {
            "amount": "3",
            "unit": "cups",
            "name": "(380 grams) all-purpose flour",
            "notes": None,
        },
        {"amount": "1", "unit": "teaspoon", "name": "baking soda", "notes": None},
        {"amount": "1", "unit": "teaspoon", "name": "fine sea salt", "notes": None},
        {
            "amount": "2",
            "unit": None,
            "name": "sticks (227 grams) unsalted butter, at cool room temperature (67°F)",
            "notes": None,
        },
        {
            "amount": "1/2",
            "unit": "cup",
            "name": "(100 grams) granulated sugar",
            "notes": None,
        },
        {
            "amount": "1 1/4",
            "unit": "cups",
            "name": "(247 grams) lightly packed light brown sugar",
            "notes": None,
        },
        {"amount": "2", "unit": "teaspoons", "name": "vanilla", "notes": None},
        {
            "amount": "2",
            "unit": None,
            "name": "large eggs, at room temperature",
            "notes": None,
        },
        {
            "amount": "2",
            "unit": "cups",
            "name": "(340 grams) semisweet chocolate chips",
            "notes": None,
        },
    ]
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search