from flask import Flask, render_template
from bs4 import BeautifulSoup
import requests
import pandas as pd
app = Flask(__name__)
@app.route("/")
def job_scraper():
url01 = "https://www.linkedin.com/jobs/search?keywords=Python%20%28Programming%20Language%29&location=Las%20Vegas%2C%20Nevada%2C%20United%20States&geoId=100293800¤tJobId=3477751834&position=7&pageNum=1"
url_request_01 = requests.get(url01)
soup = BeautifulSoup(url_request_01.text, 'html.parser')
job_title_pull = soup.find_all(class_="base-search-card__title")
job_subtitle_pull = soup.find_all(class_="base-search-card__subtitle")
job_location_pull = soup.find_all(class_="job-search-card__location")
job_date_pull = soup.find_all(class_="job-search-card__listdate")
job_links = [job_link_pull['href'] for job_link_pull in soup.find_all('a', href=True, class_="base-card__full-link absolute top-0 right-0 bottom-0 left-0 p-0 z-[2]")]
job_title_data = [title.text.strip() for title in job_title_pull]
job_subtitle_data = [subtitle.text.strip() for subtitle in job_subtitle_pull]
job_location_data = [location.text.strip() for location in job_location_pull]
job_date_data = [date.text.strip() for date in job_date_pull]
data = {'title': job_title_data,
'subtitle': job_subtitle_data,
'location': job_location_data,
'date': job_date_data,
'link': job_links}
df = pd.DataFrame(data)
return render_template('index.html', table_html=df.to_html(classes='table table-striped', index=False))
if __name__ == '__main__':
app.run(host="0.0.0.0", port=5001)
here is my html code
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Your Web App</title>
</head>
<body>
<h1>Your Data</h1>
{{ table_html | safe }}
</body>
</html>
Here is the error
applemacbookproa2289@APPLEs-MacBook-Pro-2 web-scraping-01 % python3 scraping.py
* Serving Flask app 'scraping'
* Debug mode: off
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:5001
* Running on http://192.168.4.53:5001
Press CTRL+C to quit
[2024-01-12 10:03:36,006] ERROR in app: Exception on / [GET]
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/flask/app.py", line 1455, in wsgi_app
response = self.full_dispatch_request()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/flask/app.py", line 869, in full_dispatch_request
rv = self.handle_user_exception(e)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/flask/app.py", line 867, in full_dispatch_request
rv = self.dispatch_request()
^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/flask/app.py", line 852, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/applemacbookproa2289/Desktop/web-scraping-01/scraping.py", line 33, in job_scraper
df = pd.DataFrame(data)
^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/frame.py", line 733, in __init__
mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/internals/construction.py", line 503, in dict_to_mgr
return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/internals/construction.py", line 114, in arrays_to_mgr
index = _extract_index(arrays)
^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/internals/construction.py", line 677, in _extract_index
raise ValueError("All arrays must be of the same length")
ValueError: All arrays must be of the same length
192.168.4.53 - - [12/Jan/2024 10:03:36] "GET / HTTP/1.1" 500 -
[2024-01-12 10:04:22,869] ERROR in app: Exception on / [GET]
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/flask/app.py", line 1455, in wsgi_app
response = self.full_dispatch_request()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/flask/app.py", line 869, in full_dispatch_request
rv = self.handle_user_exception(e)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/flask/app.py", line 867, in full_dispatch_request
rv = self.dispatch_request()
^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/flask/app.py", line 852, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/applemacbookproa2289/Desktop/web-scraping-01/scraping.py", line 33, in job_scraper
df = pd.DataFrame(data)
^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/frame.py", line 733, in __init__
mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/internals/construction.py", line 503, in dict_to_mgr
return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/internals/construction.py", line 114, in arrays_to_mgr
index = _extract_index(arrays)
^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pandas/core/internals/construction.py", line 677, in _extract_index
raise ValueError("All arrays must be of the same length")
ValueError: All arrays must be of the same length
192.168.4.53 - - [12/Jan/2024 10:04:22] "GET / HTTP/1.1" 500 -
I tried a bunch of random solution already, but I can’t figure out whats wrong. May you please point me in the right direction, as I am just testing this out. Once I am able to get the scrapped data to html, the next step for me is to take user input Job title and location to pull the data something similar to this.
jobtitle = input("Enter job title: ")
location = input("Enter job location: ")
url01 = "https://www.linkedin.com/jobs/search?keywords="+jobtitle+"&location="+location+"&geoId=&trk=public_jobs_jobs-search-bar_search-submit&position=1&pageNum=0"
2
Answers
I was able to solve the issue here is my solution
Here is the code for HTML to loop through the data frame created in python
The problem seems that not every job has element with
class="job-search-card__listdate"
so you need to check for that:Prints: