I would like to create sql script that will split a csv into 3 mysql tables
However, whenever, i run the sql script below, it throws an error, no subject or that in the field list.I have doubled cross my csv, all the headers and the data are intact.This is the sql script below :***
The first part of the script create all the neccessary tables to actualize this task.
The second part load the csv into these tables using load data infile
-- Create students table
SET FOREIGN_KEY_CHECKS = 0;
-- Create students table
CREATE TABLE IF NOT EXISTS students (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL,
class VARCHAR(10) NOT NULL
);
-- Create subjects table
CREATE TABLE IF NOT EXISTS subjects (
id INT AUTO_INCREMENT PRIMARY KEY,
student_id INT NOT NULL,
subject_name VARCHAR(50) NOT NULL,
subject_ca_score INT DEFAULT NULL,
subject_exam_score INT DEFAULT NULL,
subject_score INT DEFAULT NULL,
subject_type VARCHAR(10) DEFAULT NULL,
FOREIGN KEY (student_id) REFERENCES students(id)
);
-- Create results table
CREATE TABLE IF NOT EXISTS results (
id INT AUTO_INCREMENT PRIMARY KEY,
student_id INT NOT NULL,
subject_name VARCHAR(50) NOT NULL,
subject_type VARCHAR(10) NOT NULL,
subject_ca_score INT NOT NULL,
subject_exam_score INT NOT NULL,
subject_score INT NOT NULL,
grade VARCHAR(2) DEFAULT NULL,
FOREIGN KEY (student_id) REFERENCES students(id)
);
-- Load data into the subjects table from CSV
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE subjects
FIELDS TERMINATED BY ','
LINES TERMINATED BY 'rn'
IGNORE 1 ROWS
(student_id, math_score_ca, math_score_exam, econ_score_ca, econ_score_exam, civic_score_ca, civic_score_exam, lit_score_ca, lit_score_exam)
SET
subject_name = 'Math',
subject_ca_score = math_score_ca,
subject_exam_score = math_score_exam,
subject_score = math_score_ca + math_score_exam,
subject_type = 'CA',
student_id = @last_student_id + 1;
-- Insert records for each subject into the subjects table
INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Econ', econ_score_ca, econ_score_exam, econ_score_ca + econ_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;
INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Civic', civic_score_ca, civic_score_exam, civic_score_ca + civic_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;
INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Lit', lit_score_ca, lit_score_exam, lit_score_ca + lit_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;
-- Calculate grades and insert them into the results table
INSERT INTO results (student_id, subject_name, subject_type, subject_ca_score, subject_exam_score, subject_score, grade)
SELECT student_id, subject_name, 'CA', subject_ca_score, subject_exam_score, subject_ca_score + subject_exam_score,
CASE
WHEN subject_ca_score + subject_exam_score IS NULL THEN NULL
WHEN subject_ca_score + subject_exam_score >= 90 THEN 'A'
WHEN subject_ca_score + subject_exam_score >= 80 THEN 'B'
WHEN subject_ca_score + subject_exam_score >= 70 THEN 'C'
WHEN subject_ca_score + subject_exam_score >= 60 THEN 'D'
ELSE 'F'
END
FROM subjects
WHERE student_id = @last_student_id +1;
-- Display the student's name and their results
SELECT s.name, r.subject_name, r.grade
FROM students s
INNER JOIN results r ON s.id = r.student_id
WHERE s.id = @last_student_id;
-- End of script.
SET FOREIGN_KEY_CHECKS = 1;
This is the error:
SET FOREIGN_KEY_CHECKS = 0
> OK
> Query Time: 0.001s
-- Create students table
SET FOREIGN_KEY_CHECKS = 0
> OK
> Query Time: 0.009s
-- Create students table
CREATE TABLE IF NOT EXISTS students (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL,
class VARCHAR(10) NOT NULL
)
> OK
> Query Time: 0.001s
-- Create subjects table
CREATE TABLE IF NOT EXISTS subjects (
id INT AUTO_INCREMENT PRIMARY KEY,
student_id INT NOT NULL,
subject_name VARCHAR(50) NOT NULL,
subject_ca_score INT DEFAULT NULL,
subject_exam_score INT DEFAULT NULL,
subject_score INT DEFAULT NULL,
subject_type VARCHAR(10) DEFAULT NULL,
FOREIGN KEY (student_id) REFERENCES students(id)
)
> OK
> Query Time: 0.006s
-- Create results table
CREATE TABLE IF NOT EXISTS results (
id INT AUTO_INCREMENT PRIMARY KEY,
student_id INT NOT NULL,
subject_name VARCHAR(50) NOT NULL,
subject_type VARCHAR(10) NOT NULL,
subject_ca_score INT NOT NULL,
subject_exam_score INT NOT NULL,
subject_score INT NOT NULL,
grade VARCHAR(2) DEFAULT NULL,
FOREIGN KEY (student_id) REFERENCES students(id)
)
> OK
> Query Time: 0.002s
-- Load data into the subjects table from CSV
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE subjects
FIELDS TERMINATED BY ','
LINES TERMINATED BY 'rn'
IGNORE 1 ROWS
(student_id, math_score_ca, math_score_exam, econ_score_ca, econ_score_exam, civic_score_ca, civic_score_exam, lit_score_ca, lit_score_exam)
SET
subject_name = 'Math',
subject_ca_score = math_score_ca,
subject_exam_score = math_score_exam,
subject_score = math_score_ca + math_score_exam,
subject_type = 'CA',
student_id = @last_student_id + 1
> 1054 - Unknown column 'math_score_ca' in 'field list'
> Query Time: 0.009s
This is csv file, when opened in with a text editor, notepad++
name,class,math_score_ca,math_score_exam,econ_score_ca,econ_score_exam,civic_score_ca,civic_score_exam,lit_score_ca,lit_score_exam
John Doe,SS3,70,80,75,85,80,90,85,95
Jane Doe,SS3,85,90,80,75,90,85,70,80
2
Answers
Your scheme is illogical.
You’d create:
Class
andSubjectType
;Student
which refers toClass
;Subject
which refers toSubjectType
;Result
which refers toStudent
andSubject
.Depends on subject area details the scheme may contain more tables. Also these tables must contain suitable unique constraints.
For your current approach to work with your existing structure (bad idea, see Akina’s answer) you would need to do multiple passes of the CSV, first inserting the students (no idea where you are getting
@last_student_id
from) and then the pass for each subject.A better approach would be to normalise your data structure (this is only an example and would need "polishing" to meet your requirements):
And parse the CSV into a temporary table:
And then use that to populate your other tables: