skip to Main Content

I am trying to create a script to read a csv file and update my local postgreSQL table. This is the data I am trying to upload.

import psycopg2 as pg
from pandas import read_excel,DataFrame

def execute_query(connection,cursor,query:str):    
    cursor.execute(query)
    connection.commit()

def create_conn():
    try:
        connection = pg.connect("host='localhost' port='5432' dbname='abc' user='abc' password='abc'")
        cursor = connection.cursor()
        return connection,cursor
    except:
        print("Connection failed")

def read_sql_file(filename):
    with open(filename, 'r') as file:
        sql_queries = file.read()
    return sql_queries

def import_csv_data(connection, cursor, csv_file,tablename):
    try:
       df = read_excel(csv_file)

       for i,row in df.iterrows():
           values = ",".join(map(str,row.values))
           query = f"INSERT INTO {tablename} VALUES {values};"
           execute_query(connection,cursor,query)

    except Exception as e:
       print(f"Error: {e}")


def close_connection(connection,cursor):
    connection.close()
    cursor.close()


if __name__ == "__main__":    
    conn, curr = create_conn()
    if conn and curr:        
        
        ## Create table
        # sql_file = "player_behaviour_create.sql"  
        # query = read_sql_file(sql_file)                        
        # execute_query(conn,curr,query)

        ## Insert Data
        import_csv_data(conn,curr,r"Datadata_sample_100_rows.xlsx","player_behaviour")    

        close_connection(conn,curr)
        

SQL Query for table creation:

CREATE TABLE player_behaviour (
    PlayerID INT PRIMARY KEY,
    Age INT,
    Gender VARCHAR(10),
    Location VARCHAR(50),
    GameID INT,
    PlayTime FLOAT,
    FavoriteGame VARCHAR(50),
    SessionID BIGINT,
    CampaignID INT,
    AdsSeen INT,
    PurchasesMade INT,
    EngagementLevel VARCHAR(10)
);

I am getting the following error:
enter image description here

What I suspect is since I am converting everything into a string (import_csv_data), there is a datatype mismatch happening, but I am not sure. If this is so, what can I do?
I could also use some tips to optimize this code.

2

Answers


  1. Each row you’re inserting must be wrapped in parens.

    insert into table(a,b,c) 
    values (1,2,3);
    

    See SQL tutorial: INSERT.


    Two notes. First, don’t concatenate SQL values as strings. This leaves you vulnerable to a visit from Little Bobby Tables aka a SQL Injection Attack. Use parameter substitution.

    Second, you can insert multiple rows in a single insert. This makes bulk loading significantly faster.

    insert into table(a,b,c) values
      (1,2,3),
      (4,5,6),
      (7,8,9);
    
    Login or Signup to reply.
  2. To upload a CSV to PostgreSQL and handle data type mismatches, you should cast the values appropriately.

    import psycopg2 as pg
    import pandas as pd
    
    def execute_query(connection, cursor, query: str):    
        cursor.execute(query)
        connection.commit()
    
    def create_conn():
        try:
            connection = pg.connect("host='localhost' port='5432' dbname='abc' user='abc' password='abc'")
            cursor = connection.cursor()
            return connection, cursor
        except Exception as e:
            print(f"Connection failed: {e}")
    
    def read_sql_file(filename):
        with open(filename, 'r') as file:
            sql_queries = file.read()
        return sql_queries
    
    def import_csv_data(connection, cursor, csv_file, tablename):
        try:
            df = pd.read_excel(csv_file)
            
            for i, row in df.iterrows():
                row_values = [pg.extensions.AsIs(val) if isinstance(val, str) else val for val in row]
                values_placeholder = ', '.join(['%s'] * len(row_values))
                query = f"INSERT INTO {tablename} VALUES ({values_placeholder});"
                cursor.execute(query, row_values)
                connection.commit()
    
        except Exception as e:
            print(f"Error: {e}")
    
    def close_connection(connection, cursor):
        cursor.close()
        connection.close()
    
    if __name__ == "__main__":    
        conn, curr = create_conn()
        if conn and curr:        
            import_csv_data(conn, curr, r"Datadata_sample_100_rows.xlsx", "player_behaviour")    
            close_connection(conn, curr)
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search