skip to Main Content

This code is reading data in chunks between certain dates.
The problem is – once I stop the script, for instance with ctrl-c, and then restart, the cursor position continues at the same position / datetime where it stopped.
How could it be done so that every time the script will be started the query begins at the beginning of the date range. In this case 2023-03-01? At the moment I need to restart the postgres server to achieve the reset, which is obviously a bad solution

import psycopg2 
import pandas as pd
from datetime import datetime, date
import time
import sys
import os

for i in range(3,7):
    try:
        
        conn = psycopg2.connect(
                    host="localhost",
                    database="dbx",
                    port=5432,
                    user="whatever",
                    options="-c search_path=dbo,data",
                    password="xxxx")
        #cur = conn.cursor()
        cur = conn.cursor(name=f'cursor_{i}')
        start_date = date(2023, i, 1)
        end_date = date(2023, i+1, 1) if i < 12 else date(2024, 1, 1)
        
        chunk_size = 100
        cur.execute("SELECT * FROM data.table WHERE datetime >= %s AND datetime <= %s", (start_date, end_date))

        rows = cur.fetchmany(chunk_size)

      
        while rows:
            for row in rows:
                time.sleep(0.004)
                rmq_tx= {"...some db stuff here..."}

                print(rmq_tx)

            rows = cur.fetchmany(chunk_size)
    except KeyboardInterrupt:
        print('Interrupted')
        cur.close()
        conn.rollback()
        conn.commit() 
        try:
            sys.exit(0)
        except SystemExit:
            os._exit(0)
    finally:
        cur.close()
        conn.rollback()
        conn.commit() 
cur.close()
conn.close()

2

Answers


  1. Not really an answer but a simple test case that works for me:

    import os
    import sys
    import time
    import psycopg2
    
    
    try:
        con = psycopg2.connect(database='farm_db', user='farm_admin', host='localhost')
        cur = con.cursor('test_cur')
        eqpt_qry = "select * from eqpt_model_info"
        cur.execute(eqpt_qry)
        for row in cur:
            time.sleep(0.01)
            print(row)
    except KeyboardInterrupt:
        cur.close()
        con.commit() 
        try:
            sys.exit(0)
        except SystemExit:
            os._exit(0)
    finally:
        cur.close()
        con.commit() 
        con.close()
    

    Running it once:

    aklaver@ranger:~/software_projects> python3 close_cur.py 
    (1, 'trc', 'A-B Bolinder-Munktell', 'Volvo', 'T425', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (2, 'trc', 'A-B Bolinder-Munktell', 'Volvo', 'T55', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (3, 'trc', 'A.D. Baker Co.', 'Baker', '43-67', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (4, 'trc', 'AGCO', 'AGCO', 'DT160', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (5, 'trc', 'AGCO', 'AGCO', 'DT180', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (6, 'trc', 'AGCO', 'AGCO', 'LT75', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    ...
    ^C
    

    Running it second time:

    aklaver@ranger:~/software_projects> python3 close_cur.py 
    (1, 'trc', 'A-B Bolinder-Munktell', 'Volvo', 'T425', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (2, 'trc', 'A-B Bolinder-Munktell', 'Volvo', 'T55', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (3, 'trc', 'A.D. Baker Co.', 'Baker', '43-67', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (4, 'trc', 'AGCO', 'AGCO', 'DT160', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (5, 'trc', 'AGCO', 'AGCO', 'DT180', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (6, 'trc', 'AGCO', 'AGCO', 'LT75', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (7, 'trc', 'AGCO', 'AGCO', 'LT75A', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    (8, 'trc', 'AGCO', 'AGCO', 'LT90', None, datetime.datetime(2022, 3, 16, 11, 2, 50, 789198, tzinfo=datetime.timezone(datetime.timedelta(-1, 61200))), None, 'farm_admin', None)
    ...
    ^C
    
    Login or Signup to reply.
  2. Your query has no ORDER BY, so the database can return the rows in any order it pleases. In this case, its pleasure is to start up again roughly where the last one left off. The ‘missing’ rows will be returned at the end, if you read that far.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search