I wrote a code to scrape data and insert it into MYSQL database, but the data wasn’t inserted in the right columns in the database. It mixes up, like some rows in ‘skills’ being saved under ‘company_name’ column and vice versa. How do i make the different data to be saved under the appropriate columns respectively.
Here is my code
import requests
from bs4 import BeautifulSoup
import csv
from csv import writer
import mysql.connector
db = mysql.connector.connect(host= 'localhost',
user= 'root',
password= 'Maxesafrica2')
cursor = db.cursor()
cursor.execute("CREATE DATABASE jobberman_db")
print("Connection to MYSQL Established!")
db = mysql.connector.connect(host= 'localhost',
user= 'root',
password= 'Maxesafrica2',
database = 'jobberman_db'
)
print("Connected to Database!")
cursor = db.cursor()
mysql_create_table_query = """CREATE TABLE jobberman_tbl (company_name Varchar(3000) NOT NULL,
skills Varchar(3000) NOT NULL,
job_desc VarChar(3000),
job_link VarChar(3000),
salary_scale VarChar(3000))"""
result = cursor.execute(mysql_create_table_query)
html_text = requests.get('https://www.jobberman.com/jobs/hospitality-leisure').text
soup = BeautifulSoup(html_text, 'lxml')
jobs = soup.find_all('div', class_ = 'mx-5 md:mx-0 flex flex-wrap col-span-1 mb-5 bg-white rounded-lg border border-gray-300 hover:border-gray-400 focus-within:ring-2 focus-within:ring-offset-2 focus-within:ring-gray-500')
with open('jobberman.csv', 'w+', newline = '', encoding = 'utf-8') as f:
header = ['company name', 'skills', 'job desc', 'job link', 'salary scale']
writer = csv.writer(f)
writer.writerow(header)
salary_scale = ''
job_list = []
for job in jobs:
company_name = job.find('p', class_ = 'text-sm text-brand-linked').text
skills = job.find('p', class_ = 'text-lg font-medium break-words text-brand-linked').text
job_desc = job.find('p', class_ = 'text-sm font-normal text-gray-700 md:text-gray-500 md:pl-5').text
job_link = job.a['href']
salary_scale1 = job.find('span', class_ = 'mr-1')
if salary_scale1:
salary_scale = salary_scale1.text
else:
salary_scale = 'Not Mentioned'
joblist = [company_name, skills, job_desc, job_link, salary_scale]
writer.writerow(joblist)
all_details = {company_name, skills, job_desc, job_link, salary_scale}
all_details = tuple(all_details)
job_list.append(all_details)
insert_query = """INSERT INTO jobberman_tbl (company_name, skills, job_desc, job_link, salary_scale)
VALUES (%s, %s, %s, %s, %s)"""
records_to_insert = job_list
db = mysql.connector.connect(host= 'localhost',
user= 'root',
password= 'Maxesafrica2',
database = 'jobberman_db'
)
cursor = db.cursor()
cursor.executemany(insert_query, records_to_insert)
db.commit()
cursor.close()
db.close()
print('Done!')
Here is a picture from mysql database to illustrate what a talking about
2
Answers
I finally got the issue corrected by changing
all_details = {company_name, skills, job_desc, job_link, salary_scale}
toall_details = (company_name, skills, job_desc, job_link, salary_scale)
.Thanks @Breezer and @phatfingers for the attempt. Much appreciated. .
Your assignment to job_link doesn’t look right. Try: