skip to Main Content

I would like to create sql script that will split a csv into 3 mysql tables

However, whenever, i run the sql script below, it throws an error, no subject or that in the field list.I have doubled cross my csv, all the headers and the data are intact.This is the sql script below :***

The first part of the script create all the neccessary tables to actualize this task.

The second part load the csv into these tables using load data infile


-- Create students table

SET FOREIGN_KEY_CHECKS = 0;

-- Create students table
CREATE TABLE IF NOT EXISTS students (
  id INT AUTO_INCREMENT PRIMARY KEY,
  name VARCHAR(255) NOT NULL,
  class VARCHAR(10) NOT NULL
);

-- Create subjects table
CREATE TABLE IF NOT EXISTS subjects (
  id INT AUTO_INCREMENT PRIMARY KEY,
  student_id INT NOT NULL,
  subject_name VARCHAR(50) NOT NULL,
  subject_ca_score INT DEFAULT NULL,
  subject_exam_score INT DEFAULT NULL,
  subject_score INT DEFAULT NULL,
  subject_type VARCHAR(10) DEFAULT NULL,
  FOREIGN KEY (student_id) REFERENCES students(id)
);

-- Create results table
CREATE TABLE IF NOT EXISTS results (
  id INT AUTO_INCREMENT PRIMARY KEY,
  student_id INT NOT NULL,
  subject_name VARCHAR(50) NOT NULL,
  subject_type VARCHAR(10) NOT NULL,
  subject_ca_score INT NOT NULL,
  subject_exam_score INT NOT NULL,
  subject_score INT NOT NULL,
  grade VARCHAR(2) DEFAULT NULL,
  FOREIGN KEY (student_id) REFERENCES students(id)
);

-- Load data into the subjects table from CSV
LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
INTO TABLE subjects
FIELDS TERMINATED BY ','
LINES TERMINATED BY 'rn'
IGNORE 1 ROWS
(student_id, math_score_ca, math_score_exam, econ_score_ca, econ_score_exam, civic_score_ca, civic_score_exam, lit_score_ca, lit_score_exam)
SET
subject_name = 'Math',
subject_ca_score = math_score_ca,
subject_exam_score = math_score_exam,
subject_score = math_score_ca + math_score_exam,
subject_type = 'CA',
student_id = @last_student_id + 1;

-- Insert records for each subject into the subjects table
INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Econ', econ_score_ca, econ_score_exam, econ_score_ca + econ_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;

INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Civic', civic_score_ca, civic_score_exam, civic_score_ca + civic_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;

INSERT INTO subjects (student_id, subject_name, subject_ca_score, subject_exam_score, subject_score, subject_type)
SELECT student_id, 'Lit', lit_score_ca, lit_score_exam, lit_score_ca + lit_score_exam, 'CA'
FROM subjects
WHERE student_id = @last_student_id + 1;

-- Calculate grades and insert them into the results table
INSERT INTO results (student_id, subject_name, subject_type, subject_ca_score, subject_exam_score, subject_score, grade)
SELECT student_id, subject_name, 'CA', subject_ca_score, subject_exam_score, subject_ca_score + subject_exam_score,
  CASE
    WHEN subject_ca_score + subject_exam_score IS NULL THEN NULL
    WHEN subject_ca_score + subject_exam_score >= 90 THEN 'A'
    WHEN subject_ca_score + subject_exam_score >= 80 THEN 'B'
    WHEN subject_ca_score + subject_exam_score >= 70 THEN 'C'
    WHEN subject_ca_score + subject_exam_score >= 60 THEN 'D'
    ELSE 'F'
  END
FROM subjects
WHERE student_id = @last_student_id +1;

-- Display the student's name and their results
SELECT s.name, r.subject_name, r.grade
FROM students s
INNER JOIN results r ON s.id = r.student_id
WHERE s.id = @last_student_id;

-- End of script.

SET FOREIGN_KEY_CHECKS = 1;

This is the error:

SET FOREIGN_KEY_CHECKS = 0
        > OK
        > Query Time: 0.001s
        
        
        -- Create students table
        SET FOREIGN_KEY_CHECKS = 0
        > OK
        > Query Time: 0.009s
        
        
        -- Create students table
        CREATE TABLE IF NOT EXISTS students (
          id INT AUTO_INCREMENT PRIMARY KEY,
          name VARCHAR(255) NOT NULL,
          class VARCHAR(10) NOT NULL
        )
        > OK
        > Query Time: 0.001s
        
        
        -- Create subjects table
        CREATE TABLE IF NOT EXISTS subjects (
          id INT AUTO_INCREMENT PRIMARY KEY,
          student_id INT NOT NULL,
          subject_name VARCHAR(50) NOT NULL,
          subject_ca_score INT DEFAULT NULL,
          subject_exam_score INT DEFAULT NULL,
          subject_score INT DEFAULT NULL,
          subject_type VARCHAR(10) DEFAULT NULL,
          FOREIGN KEY (student_id) REFERENCES students(id)
        )
        > OK
        > Query Time: 0.006s
        
        
        -- Create results table
        CREATE TABLE IF NOT EXISTS results (
          id INT AUTO_INCREMENT PRIMARY KEY,
          student_id INT NOT NULL,
          subject_name VARCHAR(50) NOT NULL,
          subject_type VARCHAR(10) NOT NULL,
          subject_ca_score INT NOT NULL,
          subject_exam_score INT NOT NULL,
          subject_score INT NOT NULL,
          grade VARCHAR(2) DEFAULT NULL,
          FOREIGN KEY (student_id) REFERENCES students(id)
        )
        > OK
        > Query Time: 0.002s
        
        
        -- Load data into the subjects table from CSV
        LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
        INTO TABLE subjects
        FIELDS TERMINATED BY ','
        LINES TERMINATED BY 'rn'
        IGNORE 1 ROWS
        (student_id, math_score_ca, math_score_exam, econ_score_ca, econ_score_exam, civic_score_ca, civic_score_exam, lit_score_ca, lit_score_exam)
        SET
        subject_name = 'Math',
        subject_ca_score = math_score_ca,
        subject_exam_score = math_score_exam,
        subject_score = math_score_ca + math_score_exam,
        subject_type = 'CA',
    student_id = @last_student_id + 1
    > 1054 - Unknown column 'math_score_ca' in 'field list'
    > Query Time: 0.009s

This is csv file, when opened in with a text editor, notepad++

name,class,math_score_ca,math_score_exam,econ_score_ca,econ_score_exam,civic_score_ca,civic_score_exam,lit_score_ca,lit_score_exam
John Doe,SS3,70,80,75,85,80,90,85,95
Jane Doe,SS3,85,90,80,75,90,85,70,80

2

Answers


  1. Your scheme is illogical.

    You’d create:

    • Separate tables Class and SubjectType;
    • Table Student which refers to Class;
    • Table Subject which refers to SubjectType;
    • Table Result which refers to Student and Subject.

    Depends on subject area details the scheme may contain more tables. Also these tables must contain suitable unique constraints.

    Login or Signup to reply.
  2. For your current approach to work with your existing structure (bad idea, see Akina’s answer) you would need to do multiple passes of the CSV, first inserting the students (no idea where you are getting @last_student_id from) and then the pass for each subject.

    -- ADD UNIQUE KEY TO students (name, class)
    ALTER TABLE `students` ADD UNIQUE INDEX (`name`, `class`);
    
    -- LOAD STUDENTS
    LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv' IGNORE
    INTO TABLE students
    FIELDS TERMINATED BY ','
    LINES TERMINATED BY 'rn'
    IGNORE 1 ROWS
    (@name, @class, @math_score_ca, @math_score_exam, @econ_score_ca, @econ_score_exam, @civic_score_ca, @civic_score_exam, @lit_score_ca, @lit_score_exam)
    SET id = NULL, name = @name, class = @class;
    
    -- REPEAT THIS STEP FOR EACH SUBJECT
    LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
    INTO TABLE subjects
    FIELDS TERMINATED BY ','
    LINES TERMINATED BY 'rn'
    IGNORE 1 ROWS
    (@name, @class, @math_score_ca, @math_score_exam, @econ_score_ca, @econ_score_exam, @civic_score_ca, @civic_score_exam, @lit_score_ca, @lit_score_exam)
    SET
        id = NULL,
        student_id = (SELECT id FROM students WHERE name = @name AND class = @class),
        subject_name = 'Math',
        subject_ca_score = @math_score_ca,
        subject_exam_score = @math_score_exam,
        subject_score = @math_score_ca + @math_score_exam,
        subject_type = 'CA';
    
    -- THEN YOUR INSERT INTO results
    

    A better approach would be to normalise your data structure (this is only an example and would need "polishing" to meet your requirements):

    CREATE TABLE IF NOT EXISTS classes (
        id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
        name VARCHAR(10) NOT NULL UNIQUE
    );
    
    CREATE TABLE IF NOT EXISTS students (
        id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
        name VARCHAR(255) NOT NULL,
        class_id INT UNSIGNED NOT NULL,
        UNIQUE (name, class_id),
        FOREIGN KEY (class_id) REFERENCES classes (id)
    );
    
    CREATE TABLE IF NOT EXISTS subjects (
        id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
        name VARCHAR(50) NOT NULL,
        type ENUM('CA' /* ADD OTHER SUBJECT TYPES HERE OR MOVE TO THEIR OWN TABLE */) DEFAULT NULL
    );
    INSERT INTO subjects VALUES (1, 'Math', 'CA'), (2, 'Econ', 'CA'), (3, 'Civic', 'CA'), (4, 'Lit', 'CA');
    
    CREATE TABLE IF NOT EXISTS results (
        id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
        student_id INT UNSIGNED NOT NULL,
        subject_id INT UNSIGNED NOT NULL,
        ca_score INT NOT NULL,
        exam_score INT NOT NULL,
        score INT AS (ca_score + exam_score),
        grade VARCHAR(2) AS (CASE WHEN score IS NULL THEN NULL WHEN score >= 90 THEN 'A' WHEN score >= 80 THEN 'B' WHEN score >= 70 THEN 'C' WHEN score >= 60 THEN 'D' ELSE 'F' END),
        FOREIGN KEY (student_id) REFERENCES students (id),
        FOREIGN KEY (subject_id) REFERENCES subjects (id)
    );
    

    And parse the CSV into a temporary table:

    CREATE TEMPORARY TABLE import_csv (
        name VARCHAR(255) NOT NULL,
        class VARCHAR(10) NOT NULL,
        math_score_ca TINYINT UNSIGNED NOT NULL,
        math_score_exam TINYINT UNSIGNED NOT NULL,
        econ_score_ca TINYINT UNSIGNED NOT NULL,
        econ_score_exam TINYINT UNSIGNED NOT NULL,
        civic_score_ca TINYINT UNSIGNED NOT NULL,
        civic_score_exam TINYINT UNSIGNED NOT NULL,
        lit_score_ca TINYINT UNSIGNED NOT NULL,
        lit_score_exam TINYINT UNSIGNED NOT NULL
    );
    
    LOAD DATA INFILE 'C:/Users/HP USER/Documents/mytest.csv'
    INTO TABLE import_csv
    FIELDS TERMINATED BY ','
    LINES TERMINATED BY 'rn'
    IGNORE 1 ROWS;
    

    And then use that to populate your other tables:

    INSERT IGNORE INTO classes (name)
    SELECT DISTINCT class
    FROM import_csv;
    
    INSERT IGNORE INTO students (name, class_id)
    SELECT DISTINCT csv.name, c.id
    FROM import_csv csv
    JOIN classes c ON c.name = csv.class;
    
    INSERT INTO results (student_id, subject_id, ca_score, exam_score)
    SELECT s.id, 1 /* Math */, csv.math_score_ca, csv.math_score_exam
    FROM import_csv csv
    JOIN classes c ON csv.class = c.name
    JOIN students s ON csv.name = s.name AND c.id = s.class_id;
    
    INSERT INTO results (student_id, subject_id, ca_score, exam_score)
    SELECT s.id, 2 /* Econ */, csv.econ_score_ca, csv.econ_score_exam
    FROM import_csv csv
    JOIN classes c ON csv.class = c.name
    JOIN students s ON csv.name = s.name AND c.id = s.class_id;
    
    INSERT INTO results (student_id, subject_id, ca_score, exam_score)
    SELECT s.id, 3 /* Civic */, csv.civic_score_ca, csv.civic_score_exam
    FROM import_csv csv
    JOIN classes c ON csv.class = c.name
    JOIN students s ON csv.name = s.name AND c.id = s.class_id;
    
    INSERT INTO results (student_id, subject_id, ca_score, exam_score)
    SELECT s.id, 4 /* Lit */, csv.lit_score_ca, csv.lit_score_exam
    FROM import_csv csv
    JOIN classes c ON csv.class = c.name
    JOIN students s ON csv.name = s.name AND c.id = s.class_id;
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search