Html - Selenium scroll flickr page to get all the images

Explorer
September 22, 2023
91 views
1 vote
2 Answers

Hello I am trying to get Flickr public images from flicker group and I am able to parse the html and get the image href however struggling to find a way to get all the images from a page as we scroll the page down. My below code only returns all the hrefs for images on front page but how can I get all the href after scrolling to the bottom.

from bs4 import BeautifulSoup
import urllib.request
from selenium import webdriver
import time
op = webdriver.ChromeOptions()
op.add_argument('headless')
driver = webdriver.Chrome(options=op)
url = "https://www.flickr.com/groups/allfreepictures/pool/page3041"

driver.get(url=url)

html1 = driver.page_source
soup = BeautifulSoup(html1, 'html.parser')
image_urls = [link['href'] for link in soup.findAll("a", {"class": "overlay"})]
print(image_urls)

Answers

By sending page down key, maybe you can get what you want. Try this:

from bs4 import BeautifulSoup
import urllib.request
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
op = webdriver.ChromeOptions()
op.add_argument('headless')
driver = webdriver.Chrome()
url = "https://www.flickr.com/groups/allfreepictures/pool/page3041"


driver.get(url=url)
number = 0
total = 0
while True:
    html1 = driver.page_source
    soup = BeautifulSoup(html1, 'html.parser')
    image_urls = [link['href'] for link in soup.findAll("a", {"class":"overlay"})]
    if len(image_urls) == total:
        number += 1
        if number == 5:
            break
    else:
        number = 0
        total = len(image_urls)
    print(len(image_urls))
    body = driver.find_element(By.CSS_SELECTOR, "body")
    for _ in range(30):
        body.send_keys(Keys.PAGE_DOWN)
    time.sleep(1.5)
    #print(image_urls)

You should first try scrolling to the bottom of the page to make sure to load all the available images on the page.

Here’s how you try:

import time
from bs4 import BeautifulSoup
from selenium import webdriver

driver = webdriver.Chrome()
url = "https://www.flickr.com/groups/allfreepictures/pool/page3041"

driver.get(url=url)

# scroll to the bottom of the page to load all available images
flag = True
last_height = driver.execute_script("return document.body.scrollHeight")
while flag:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(1)
    new_height = driver.execute_script("return document.body.scrollHeight")

    if new_height == last_height:
        flag = False
    else:
        last_height = new_height

time.sleep(2)

soup = BeautifulSoup(driver.page_source, 'html.parser')
image_urls = [link['href'] for link in soup.findAll("a", {"class": "overlay"})]
print(len(image_urls))
print(image_urls)

output:

100
['/photos/78129336100/14807791792/in/pool-allfreepictures/', '/photos/florida_photo_guy/31049045241/in/pool-allfreepictures/', '/photos/lgbarrio/30320683634/in/pool-allfreepictures/', '/photos/reecegarsidephotography/31048800591/in/pool-allfreepictures/', '/photos/rulenumberone2/30978473452/in/pool-allfreepictures/', '/photos/city-amsterdam/31019116552/in/pool-allfreepictures/', '/photos/ferdinandfeys/31019310362/in/pool-allfreepictures/', '/photos/ferdinandfeys/31162784025/in/pool-allfreepictures/', '/photos/woolamaloo_gazette/30795198300/in/pool-allfreepictures/', '/photos/woolamaloo_gazette/31162575685/in/pool-allfreepictures/', '/photos/woolamaloo_gazette/31162582055/in/pool-allfreepictures/', '/photos/janarendtsz/31099842796/in/pool-allfreepictures/', '/photos/zuzaritt/31123916656/in/pool-allfreepictures/', '/photos/kyllercg/30791661100/in/pool-allfreepictures/', '/photos/max-garcia/15990803843/in/pool-allfreepictures/', '/photos/pepperberryfarm/31001562352/in/pool-allfreepictures/', '/photos/stephi2006/31016258101/in/pool-allfreepictures/', '/photos/stephenmelkisethian/30325000854/in/pool-allfreepictures/', '/photos/dfoerster_fotografie/31048383701/in/pool-allfreepictures/', '/photos/123_456/31081977166/in/pool-allfreepictures/', '/photos/han350d/31018711912/in/pool-allfreepictures/', '/photos/lakeworth/30793110210/in/pool-allfreepictures/', '/photos/lakeworth/30793052710/in/pool-allfreepictures/', '/photos/alexxx-malev/7189604574/in/pool-allfreepictures/', '/photos/philipklug/30354819193/in/pool-allfreepictures/', '/photos/86803936@N07/27760326812/in/pool-allfreepictures/', '/photos/86803936@N07/30354658053/in/pool-allfreepictures/', '/photos/sidibousaid/30794453830/in/pool-allfreepictures/', '/photos/victorianorivero/31161846945/in/pool-allfreepictures/', '/photos/marliodasilva/23192901586/in/pool-allfreepictures/', '/photos/marliodasilva/22808093739/in/pool-allfreepictures/', '/photos/boyfrom_bare/31018089812/in/pool-allfreepictures/', '/photos/boyfrom_bare/31047493641/in/pool-allfreepictures/', '/photos/boyfrom_bare/31125611646/in/pool-allfreepictures/', '/photos/marliodasilva/29636171173/in/pool-allfreepictures/', '/photos/alexxx-malev/7189161010/in/pool-allfreepictures/', '/photos/142307100@N04/31047163961/in/pool-allfreepictures/', '/photos/iancvt55/4988019788/in/pool-allfreepictures/', '/photos/129472585@N03/30339455884/in/pool-allfreepictures/', '/photos/129472585@N03/30792975780/in/pool-allfreepictures/', '/photos/kevpbur/30346873163/in/pool-allfreepictures/', '/photos/anberlin/31095275475/in/pool-allfreepictures/', '/photos/93482748@N02/30792948160/in/pool-allfreepictures/', '/photos/nilsvanrooijen/31117775386/in/pool-allfreepictures/', '/photos/bodil/30339540994/in/pool-allfreepictures/', '/photos/willj/31100006056/in/pool-allfreepictures/', '/photos/city-amsterdam/31160452445/in/pool-allfreepictures/', '/photos/robertmoranelli/30030957310/in/pool-allfreepictures/', '/photos/overdozoverdoz/31160412575/in/pool-allfreepictures/', '/photos/sullen_snowflakes/31137391155/in/pool-allfreepictures/', '/photos/aronalison/31016613392/in/pool-allfreepictures/', '/photos/22084572@N07/30328335583/in/pool-allfreepictures/', '/photos/unicocreativo/31123364466/in/pool-allfreepictures/', '/photos/renagrisa/30954404592/in/pool-allfreepictures/', '/photos/renagrisa/30161460054/in/pool-allfreepictures/', '/photos/renagrisa/30289804203/in/pool-allfreepictures/', '/photos/angelines64/31091725516/in/pool-allfreepictures/', '/photos/79786806@N07/31017953931/in/pool-allfreepictures/', '/photos/dclmeyer/30792027010/in/pool-allfreepictures/', '/photos/pngnexus/30338521124/in/pool-allfreepictures/', '/photos/akras/30687663700/in/pool-allfreepictures/', '/photos/143193344@N08/30331462213/in/pool-allfreepictures/', '/photos/130567304@N02/31045110871/in/pool-allfreepictures/', '/photos/jorbasa/31058439386/in/pool-allfreepictures/', '/photos/past/31122043566/in/pool-allfreepictures/', '/photos/136695538@N06/30791471480/in/pool-allfreepictures/', '/photos/gamut22/31015439762/in/pool-allfreepictures/', '/photos/simonjwood/31044415561/in/pool-allfreepictures/', '/photos/mtrienke/30328856733/in/pool-allfreepictures/', '/photos/78933929@N02/31126567175/in/pool-allfreepictures/', '/photos/78933929@N02/30983333542/in/pool-allfreepictures/', '/photos/pcardo/14944624534/in/pool-allfreepictures/', '/photos/angela_llop/31099192836/in/pool-allfreepictures/', '/photos/130567304@N02/31122376256/in/pool-allfreepictures/', '/photos/wernerwillemsen/31014707982/in/pool-allfreepictures/', '/photos/101630104@N02/31014594642/in/pool-allfreepictures/', '/photos/79157069@N03/31122339126/in/pool-allfreepictures/', '/photos/97423979@N00/30954549482/in/pool-allfreepictures/', '/photos/davethebird/30880370895/in/pool-allfreepictures/', '/photos/davethebird/30858796306/in/pool-allfreepictures/', '/photos/lomo54/15606791602/in/pool-allfreepictures/', '/photos/davethebird/30948393191/in/pool-allfreepictures/', '/photos/gemma33/30280818390/in/pool-allfreepictures/', '/photos/gridview/31051982976/in/pool-allfreepictures/', '/photos/string_bass_dave/30896304152/in/pool-allfreepictures/', '/photos/fgenico/31121370696/in/pool-allfreepictures/', '/photos/jafsegal/22906469774/in/pool-allfreepictures/', '/photos/56603673@N03/30789350760/in/pool-allfreepictures/', '/photos/hasgaha/30192496893/in/pool-allfreepictures/', '/photos/belurashok/31126705775/in/pool-allfreepictures/', '/photos/vic_206/30767652690/in/pool-allfreepictures/', '/photos/132420193@N06/20767542043/in/pool-allfreepictures/', '/photos/audun_bie/30349449343/in/pool-allfreepictures/', '/photos/132420193@N06/20122848659/in/pool-allfreepictures/', '/photos/hoppenbrouwers/16963478511/in/pool-allfreepictures/', '/photos/79157069@N03/31120261916/in/pool-allfreepictures/', '/photos/carolyn_gifford/20922635156/in/pool-allfreepictures/', '/photos/carolyn_gifford/9665094190/in/pool-allfreepictures/', '/photos/carolyn_gifford/9606681745/in/pool-allfreepictures/', '/photos/51764518@N02/26835326884/in/pool-allfreepictures/']

Please signup or login to give your own answer.

Click here to cancel reply.

Html – Selenium scroll flickr page to get all the images

Answers