skip to Main Content

Hello I am trying to get Flickr public images from flicker group and I am able to parse the html and get the image href however struggling to find a way to get all the images from a page as we scroll the page down. My below code only returns all the hrefs for images on front page but how can I get all the href after scrolling to the bottom.

from bs4 import BeautifulSoup
import urllib.request
from selenium import webdriver
import time
op = webdriver.ChromeOptions()
op.add_argument('headless')
driver = webdriver.Chrome(options=op)
url = "https://www.flickr.com/groups/allfreepictures/pool/page3041"

driver.get(url=url)

html1 = driver.page_source
soup = BeautifulSoup(html1, 'html.parser')
image_urls = [link['href'] for link in soup.findAll("a", {"class": "overlay"})]
print(image_urls)

2

Answers


  1. By sending page down key, maybe you can get what you want. Try this:

    from bs4 import BeautifulSoup
    import urllib.request
    from selenium import webdriver
    import time
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    op = webdriver.ChromeOptions()
    op.add_argument('headless')
    driver = webdriver.Chrome()
    url = "https://www.flickr.com/groups/allfreepictures/pool/page3041"
    
    
    driver.get(url=url)
    number = 0
    total = 0
    while True:
        html1 = driver.page_source
        soup = BeautifulSoup(html1, 'html.parser')
        image_urls = [link['href'] for link in soup.findAll("a", {"class":"overlay"})]
        if len(image_urls) == total:
            number += 1
            if number == 5:
                break
        else:
            number = 0
            total = len(image_urls)
        print(len(image_urls))
        body = driver.find_element(By.CSS_SELECTOR, "body")
        for _ in range(30):
            body.send_keys(Keys.PAGE_DOWN)
        time.sleep(1.5)
        #print(image_urls)
    
    Login or Signup to reply.
  2. You should first try scrolling to the bottom of the page to make sure to load all the available images on the page.

    Here’s how you try:

    import time
    from bs4 import BeautifulSoup
    from selenium import webdriver
    
    driver = webdriver.Chrome()
    url = "https://www.flickr.com/groups/allfreepictures/pool/page3041"
    
    driver.get(url=url)
    
    # scroll to the bottom of the page to load all available images
    flag = True
    last_height = driver.execute_script("return document.body.scrollHeight")
    while flag:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(1)
        new_height = driver.execute_script("return document.body.scrollHeight")
    
        if new_height == last_height:
            flag = False
        else:
            last_height = new_height
    
    time.sleep(2)
    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    image_urls = [link['href'] for link in soup.findAll("a", {"class": "overlay"})]
    print(len(image_urls))
    print(image_urls)
    

    output:

    100
    ['/photos/78129336100/14807791792/in/pool-allfreepictures/', '/photos/florida_photo_guy/31049045241/in/pool-allfreepictures/', '/photos/lgbarrio/30320683634/in/pool-allfreepictures/', '/photos/reecegarsidephotography/31048800591/in/pool-allfreepictures/', '/photos/rulenumberone2/30978473452/in/pool-allfreepictures/', '/photos/city-amsterdam/31019116552/in/pool-allfreepictures/', '/photos/ferdinandfeys/31019310362/in/pool-allfreepictures/', '/photos/ferdinandfeys/31162784025/in/pool-allfreepictures/', '/photos/woolamaloo_gazette/30795198300/in/pool-allfreepictures/', '/photos/woolamaloo_gazette/31162575685/in/pool-allfreepictures/', '/photos/woolamaloo_gazette/31162582055/in/pool-allfreepictures/', '/photos/janarendtsz/31099842796/in/pool-allfreepictures/', '/photos/zuzaritt/31123916656/in/pool-allfreepictures/', '/photos/kyllercg/30791661100/in/pool-allfreepictures/', '/photos/max-garcia/15990803843/in/pool-allfreepictures/', '/photos/pepperberryfarm/31001562352/in/pool-allfreepictures/', '/photos/stephi2006/31016258101/in/pool-allfreepictures/', '/photos/stephenmelkisethian/30325000854/in/pool-allfreepictures/', '/photos/dfoerster_fotografie/31048383701/in/pool-allfreepictures/', '/photos/123_456/31081977166/in/pool-allfreepictures/', '/photos/han350d/31018711912/in/pool-allfreepictures/', '/photos/lakeworth/30793110210/in/pool-allfreepictures/', '/photos/lakeworth/30793052710/in/pool-allfreepictures/', '/photos/alexxx-malev/7189604574/in/pool-allfreepictures/', '/photos/philipklug/30354819193/in/pool-allfreepictures/', '/photos/86803936@N07/27760326812/in/pool-allfreepictures/', '/photos/86803936@N07/30354658053/in/pool-allfreepictures/', '/photos/sidibousaid/30794453830/in/pool-allfreepictures/', '/photos/victorianorivero/31161846945/in/pool-allfreepictures/', '/photos/marliodasilva/23192901586/in/pool-allfreepictures/', '/photos/marliodasilva/22808093739/in/pool-allfreepictures/', '/photos/boyfrom_bare/31018089812/in/pool-allfreepictures/', '/photos/boyfrom_bare/31047493641/in/pool-allfreepictures/', '/photos/boyfrom_bare/31125611646/in/pool-allfreepictures/', '/photos/marliodasilva/29636171173/in/pool-allfreepictures/', '/photos/alexxx-malev/7189161010/in/pool-allfreepictures/', '/photos/142307100@N04/31047163961/in/pool-allfreepictures/', '/photos/iancvt55/4988019788/in/pool-allfreepictures/', '/photos/129472585@N03/30339455884/in/pool-allfreepictures/', '/photos/129472585@N03/30792975780/in/pool-allfreepictures/', '/photos/kevpbur/30346873163/in/pool-allfreepictures/', '/photos/anberlin/31095275475/in/pool-allfreepictures/', '/photos/93482748@N02/30792948160/in/pool-allfreepictures/', '/photos/nilsvanrooijen/31117775386/in/pool-allfreepictures/', '/photos/bodil/30339540994/in/pool-allfreepictures/', '/photos/willj/31100006056/in/pool-allfreepictures/', '/photos/city-amsterdam/31160452445/in/pool-allfreepictures/', '/photos/robertmoranelli/30030957310/in/pool-allfreepictures/', '/photos/overdozoverdoz/31160412575/in/pool-allfreepictures/', '/photos/sullen_snowflakes/31137391155/in/pool-allfreepictures/', '/photos/aronalison/31016613392/in/pool-allfreepictures/', '/photos/22084572@N07/30328335583/in/pool-allfreepictures/', '/photos/unicocreativo/31123364466/in/pool-allfreepictures/', '/photos/renagrisa/30954404592/in/pool-allfreepictures/', '/photos/renagrisa/30161460054/in/pool-allfreepictures/', '/photos/renagrisa/30289804203/in/pool-allfreepictures/', '/photos/angelines64/31091725516/in/pool-allfreepictures/', '/photos/79786806@N07/31017953931/in/pool-allfreepictures/', '/photos/dclmeyer/30792027010/in/pool-allfreepictures/', '/photos/pngnexus/30338521124/in/pool-allfreepictures/', '/photos/akras/30687663700/in/pool-allfreepictures/', '/photos/143193344@N08/30331462213/in/pool-allfreepictures/', '/photos/130567304@N02/31045110871/in/pool-allfreepictures/', '/photos/jorbasa/31058439386/in/pool-allfreepictures/', '/photos/past/31122043566/in/pool-allfreepictures/', '/photos/136695538@N06/30791471480/in/pool-allfreepictures/', '/photos/gamut22/31015439762/in/pool-allfreepictures/', '/photos/simonjwood/31044415561/in/pool-allfreepictures/', '/photos/mtrienke/30328856733/in/pool-allfreepictures/', '/photos/78933929@N02/31126567175/in/pool-allfreepictures/', '/photos/78933929@N02/30983333542/in/pool-allfreepictures/', '/photos/pcardo/14944624534/in/pool-allfreepictures/', '/photos/angela_llop/31099192836/in/pool-allfreepictures/', '/photos/130567304@N02/31122376256/in/pool-allfreepictures/', '/photos/wernerwillemsen/31014707982/in/pool-allfreepictures/', '/photos/101630104@N02/31014594642/in/pool-allfreepictures/', '/photos/79157069@N03/31122339126/in/pool-allfreepictures/', '/photos/97423979@N00/30954549482/in/pool-allfreepictures/', '/photos/davethebird/30880370895/in/pool-allfreepictures/', '/photos/davethebird/30858796306/in/pool-allfreepictures/', '/photos/lomo54/15606791602/in/pool-allfreepictures/', '/photos/davethebird/30948393191/in/pool-allfreepictures/', '/photos/gemma33/30280818390/in/pool-allfreepictures/', '/photos/gridview/31051982976/in/pool-allfreepictures/', '/photos/string_bass_dave/30896304152/in/pool-allfreepictures/', '/photos/fgenico/31121370696/in/pool-allfreepictures/', '/photos/jafsegal/22906469774/in/pool-allfreepictures/', '/photos/56603673@N03/30789350760/in/pool-allfreepictures/', '/photos/hasgaha/30192496893/in/pool-allfreepictures/', '/photos/belurashok/31126705775/in/pool-allfreepictures/', '/photos/vic_206/30767652690/in/pool-allfreepictures/', '/photos/132420193@N06/20767542043/in/pool-allfreepictures/', '/photos/audun_bie/30349449343/in/pool-allfreepictures/', '/photos/132420193@N06/20122848659/in/pool-allfreepictures/', '/photos/hoppenbrouwers/16963478511/in/pool-allfreepictures/', '/photos/79157069@N03/31120261916/in/pool-allfreepictures/', '/photos/carolyn_gifford/20922635156/in/pool-allfreepictures/', '/photos/carolyn_gifford/9665094190/in/pool-allfreepictures/', '/photos/carolyn_gifford/9606681745/in/pool-allfreepictures/', '/photos/51764518@N02/26835326884/in/pool-allfreepictures/']
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search