skip to Main Content

I have prepared a DB Fiddle for my question and I will also show my complete code below –

I am trying to store data in a vehicle_data table and each data record has an expires_at column:

-- Create table for customer IDs
CREATE TABLE customer_ids (
    id INTEGER PRIMARY KEY CHECK (id > 0),
    label TEXT NOT NULL CHECK (label ~ 'S')
);

-- Insert valid customer IDs
INSERT INTO customer_ids (id, label) VALUES
(1, 'Customer 1'),
(2, 'Customer 2'),
(3, 'Customer 3'),
(4, 'Customer 4'),
(5, 'Customer 5');

-- Create table for use case IDs
CREATE TABLE use_case_ids (
    id INTEGER PRIMARY KEY CHECK (id > 0),
    label TEXT NOT NULL CHECK (label ~ 'S')
);

-- Insert valid use case IDs
INSERT INTO use_case_ids (id, label) VALUES
(1, 'Use Case 1'),
(2, 'Use Case 2'),
(3, 'Use Case 3'),
(4, 'Use Case 4'),
(5, 'Use Case 5');

-- Create table for uploaded vehicle data
CREATE TABLE vehicle_data (
    -- the triple is counted when comparing against node_limit
    osm_node_id BIGINT NOT NULL CHECK (osm_node_id > 0),
    customer_id INTEGER NOT NULL,
    use_case_id INTEGER NOT NULL,

    container_id BIGINT NOT NULL CHECK (container_id > 0),
    expires_at TIMESTAMPTZ NOT NULL,

    FOREIGN KEY (customer_id) REFERENCES customer_ids(id),
    FOREIGN KEY (use_case_id) REFERENCES use_case_ids(id),
    id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY
);

-- Add a triple-column index to improve search performance
CREATE INDEX idx_vehicle_data ON vehicle_data (osm_node_id, customer_id, use_case_id);

To fill the vehicle_data table with data I have defined a simple stored procedure:

CREATE OR REPLACE FUNCTION store_vehicle_data(
    _container_id BIGINT,
    _osm_node_ids BIGINT[],
    _customer_id INTEGER,
    _use_case_id INTEGER,
    _node_limit INTEGER,
    _retention_time INTERVAL
)
RETURNS BOOLEAN AS $$
DECLARE
    _osm_node_id BIGINT;
    _row_count INTEGER;
    _should_send_pull_container BOOLEAN := TRUE;
BEGIN
    -- Delete records with expired retention time - NOT WORKING
    DELETE FROM vehicle_data
    WHERE NOW() > expires_at;

    -- Insert new records
    FOREACH _osm_node_id IN ARRAY _osm_node_ids LOOP
        BEGIN
            INSERT INTO vehicle_data (
                osm_node_id,
                customer_id, 
                use_case_id, 
                container_id, 
                expires_at
            ) VALUES (
                _osm_node_id, 
                _customer_id, 
                _use_case_id, 
                _container_id,
                NOW() + _retention_time
            );
        EXCEPTION WHEN foreign_key_violation THEN
            RAISE EXCEPTION 'Invalid customer_id % or use_case_id % for osm_node_id % container_id: %',
                _customer_id, _use_case_id, _osm_node_id, _container_id;
        END;

        -- Check if the number of records exceeds the node limit
        SELECT COUNT(*)
        INTO STRICT _row_count
        FROM vehicle_data
        WHERE osm_node_id = _osm_node_id
        AND customer_id = _customer_id
        AND use_case_id = _use_case_id;

        -- There is enough up-to-date vehicle data for this triple,
        -- so tell the vehicle not to send any PULL containers
        IF _row_count > _node_limit THEN
            _should_send_pull_container := FALSE;
        END IF;
    END LOOP;

    RETURN _should_send_pull_container;
END;
$$ LANGUAGE plpgsql;

Finally, I have prepared a smoke test for my code and run it twice:

CREATE OR REPLACE FUNCTION test_store_vehicle_data(
    num_runs INTEGER,
    OUT count_true INTEGER,
    OUT count_false INTEGER
)
RETURNS RECORD AS $$
DECLARE
    test_result BOOLEAN;
BEGIN
    count_true := 0;
    count_false := 0;

    FOR i IN 1..num_runs LOOP
        -- Store OSM node ids (2 are same, 2 are changing)
        -- with node limit 10 and retention time 5 seconds
        test_result := store_vehicle_data(
            100 + i, 
            ARRAY[1000, 2000, 3000 + i, 4000 + i],
            1, 
            5, 
            10, 
            INTERVAL '5 seconds'
        );

        IF test_result THEN
            count_true := count_true + 1;
        ELSE
            count_false := count_false + 1;
        END IF;
    END LOOP;

    RETURN;
END $$ LANGUAGE plpgsql;

-- Run 2 smoke tests
DO $$
DECLARE
    test_result RECORD;
BEGIN
    -- Test 1: store 15x4 records expiring in 5 seconds
    SELECT * INTO test_result FROM test_store_vehicle_data(15);
    IF test_result.count_true != 10 OR test_result.count_false != 5 THEN
        RAISE EXCEPTION 'Test 1 failed: expected 10 TRUE, 5 FALSE';
    END IF;

    -- Sleep for 10 seconds, so that all records in vehicle_data expire
    PERFORM pg_sleep(10);

    -- Test 2: store 15x4 records expiring in 5 seconds
    SELECT * INTO test_result FROM test_store_vehicle_data(15);
    IF test_result.count_true != 10 OR test_result.count_false != 5 THEN
        RAISE EXCEPTION 'Test 2 failed: expected 10 TRUE, 5 FALSE';
    END IF;
END $$;

-- Print all records in the vehicle_data table
SELECT expires_at < NOW() AS expired, * FROM vehicle_data ORDER BY container_id;

My problem is that the DELETE FROM vehicle_data WHERE NOW() > expires_at; statement in my store_vehicle_data() function does not delete anything.

And then the SELECT expires_at < NOW() AS expired, * FROM vehicle_data ORDER BY container_id; prints the records in the table and yes, they are all expired there.

I run the above SQL code (creating tables, creating functions, running smoke test) in a Dockerfile based on the official Postgres Dockerfile and the smoke test (the Test 2) just always fails:

FROM postgres:17-alpine3.21
RUN apk update && apk upgrade && apk add --no-cache pg_top

ARG PGUSER
ARG PGPASSWORD

# Tell docker-entrypoint.sh to create superuser "postgres"
# with password passed as build arg and database "postgres"
ENV POSTGRES_PASSWORD=$PGPASSWORD

# Tell docker-entrypoint.sh to change these params in postgresql.conf
ENV POSTGRES_INITDB_ARGS="--set max_connections=200 --set shared_buffers=16GB --set work_mem=8MB --set maintenance_work_mem=128MB --set effective_cache_size=8GB --set from_collapse_limit=24 --set join_collapse_limit=24 --set log_min_messages=notice --set log_min_duration_statement=1000"

ENV PGUSER=$PGUSER
ENV PGPASSWORD=$PGPASSWORD
ENV PGDATABASE=push_pull_database

# The files below are executed by the superuser "postgres"
# in alphabetical order after the database has been initialized
WORKDIR /docker-entrypoint-initdb.d
COPY ./01-create-database.sh .
COPY ./02-create-tables.sql .
COPY ./03-create-functions.sql .
COPY ./04-alter-owner.sh .
COPY ./05-smoke-tests.sql .
RUN chmod +x ./01-create-database.sh ./04-alter-owner.sh

Then I connect using psql to my docker container and run the DELETE command at the psql prompt and voila – it works there as expected and deletes all the expired records.

2

Answers


  1. Chosen as BEST ANSWER

    Thank you @Zegarek for the thorough explanation!

    I've decided to keep the NOW() in my stored function in the hope that it gives better performance and as the fix for the issue I have rewritten my smoke tests to run as plain SQL without any transaction -

    CREATE OR REPLACE FUNCTION test_store_vehicle_data(
        num_runs INTEGER,
        OUT count_true INTEGER,
        OUT count_false INTEGER
    )
    RETURNS RECORD AS $$
    DECLARE
        test_result BOOLEAN;
    BEGIN
        count_true := 0;
        count_false := 0;
    
        FOR i IN 1..num_runs LOOP
            -- Store OSM node ids (2 are same, 2 are changing)
            -- with node limit 10 and retention time 5 seconds
            test_result := store_vehicle_data(
                100 + i, 
                ARRAY[1000, 2000, 3000 + i, 4000 + i],
                1, 
                5, 
                10, 
                INTERVAL '5 seconds'
            );
    
            IF test_result THEN
                count_true := count_true + 1;
            ELSE
                count_false := count_false + 1;
            END IF;
        END LOOP;
    
        RETURN;
    END $$ LANGUAGE plpgsql;
    
    -- Test 1: Run the test_store_vehicle_data function
    -- to insert 15x4 records (node_limit 10, expiring
    -- in 5 seconds) into the vehicle_data table and
    -- finally store the result in a temporary table
    CREATE TEMP TABLE temp_test_result AS
    SELECT * FROM test_store_vehicle_data(15);
    
    -- The temp_test_result has just 1 record, check it
    SELECT 
        CASE 
            WHEN count_true != 10 OR count_false != 5 
            THEN 'Test 1 failed: expected 10 TRUE, 5 FALSE' 
            ELSE 'Test 1 passed' 
        END AS test_result
    FROM temp_test_result;
    
    -- Drop the temporary table to clean up
    DROP TABLE temp_test_result;
    
    -- Pause execution for 10 seconds so that all records expire
    SELECT pg_sleep(10);
    
    -- Test 2: Run the test_store_vehicle_data function
    -- to insert 15x4 records (node_limit 10, expiring
    -- in 5 seconds) into the vehicle_data table and
    -- finally store the result in a temporary table
    CREATE TEMP TABLE temp_test_result AS
    SELECT * FROM test_store_vehicle_data(15);
    
    -- The temp_test_result has just 1 record, check it
    SELECT 
        CASE 
            WHEN count_true != 10 OR count_false != 5 
            THEN 'Test 2 failed: expected 10 TRUE, 5 FALSE' 
            ELSE 'Test 2 passed' 
        END AS test_result
    FROM temp_test_result;
    
    -- Drop the temporary table to clean up
    DROP TABLE temp_test_result;
    
    -- Print all records in the vehicle_data table
    SELECT expires_at < NOW() AS is_expired, *
    FROM vehicle_data
    ORDER BY container_id;
    

  2. The result of now() doesn’t change between your calls, so your test thinks no time has passed yet. The now() function and current_timestamp are one and the same and they have a third, more self-explanatory alias, transaction_timestamp().

    Your whole do block runs in a single transaction unless you explicitly establish other transaction boundaries by issuing commits or use transaction-handling procedures inside it. This means your test keeps seeing the exact same time, each time it checks the clock:

    transaction_timestamp() is equivalent to CURRENT_TIMESTAMP, but is named to clearly reflect what it returns.(…) now() is a traditional PostgreSQL equivalent to transaction_timestamp().

    Use transactions to see changes in now() or switch to clock_timestamp():

    clock_timestamp() returns the actual current time, and therefore its value changes even within a single SQL command.

    demo at db<>fiddle

    do $d$
    begin insert into debug_ select now(), statement_timestamp(), clock_timestamp();
          perform pg_sleep(.1);
          insert into debug_ select now(), statement_timestamp(), clock_timestamp();
          perform pg_sleep(.1);
          insert into debug_ select now(), statement_timestamp(), clock_timestamp();
    end $d$;
    
    table debug_;
    
    now statement_timestamp clock_timestamp id
    2025-01-18 12:49:54.854183+00 2025-01-18 12:49:54.854183+00 2025-01-18 12:49:54.867843+00 1
    2025-01-18 12:49:54.854183+00 2025-01-18 12:49:54.854183+00 2025-01-18 12:49:54.968707+00 2
    2025-01-18 12:49:54.854183+00 2025-01-18 12:49:54.854183+00 2025-01-18 12:49:55.070274+00 3

    Meanwhile:

    do $d$
    begin insert into debug_ select now(), statement_timestamp(), clock_timestamp();
          perform pg_sleep(.1);
          commit;
          insert into debug_ select now(), statement_timestamp(), clock_timestamp();
          perform pg_sleep(.1);
          commit;
          insert into debug_ select now(), statement_timestamp(), clock_timestamp();
    end $d$;
    
    table debug_;
    
    now statement_timestamp
    this one’s fun
    clock_timestamp id
    2025-01-18 12:49:55.075149+00 2025-01-18 12:49:55.075149+00 2025-01-18 12:49:55.075404+00 1
    2025-01-18 12:49:55.176869+00 2025-01-18 12:49:55.075149+00 2025-01-18 12:49:55.177075+00 2
    2025-01-18 12:49:55.277518+00 2025-01-18 12:49:55.075149+00 2025-01-18 12:49:55.27782+00 3

    When in doubt, RAISE debug/log/info/notice messages to print out the runtime values you’re dealing with. If you only checked now() by running select now(); a few times from your client, you probably saw it keep returning the current time, changing with each call – that’s because most clients run in autocommit mode by default, so each of those selects was in a new, standalone transaction.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search