Postgresql - Why is DELETE FROM vehicle_data WHERE NOW() > expires_at; failing in my stored function?

AlexanderFarber
January 18, 2025
146 views
0 votes
2 Answers

I have prepared a DB Fiddle for my question and I will also show my complete code below –

I am trying to store data in a vehicle_data table and each data record has an expires_at column:

-- Create table for customer IDs
CREATE TABLE customer_ids (
    id INTEGER PRIMARY KEY CHECK (id > 0),
    label TEXT NOT NULL CHECK (label ~ 'S')
);

-- Insert valid customer IDs
INSERT INTO customer_ids (id, label) VALUES
(1, 'Customer 1'),
(2, 'Customer 2'),
(3, 'Customer 3'),
(4, 'Customer 4'),
(5, 'Customer 5');

-- Create table for use case IDs
CREATE TABLE use_case_ids (
    id INTEGER PRIMARY KEY CHECK (id > 0),
    label TEXT NOT NULL CHECK (label ~ 'S')
);

-- Insert valid use case IDs
INSERT INTO use_case_ids (id, label) VALUES
(1, 'Use Case 1'),
(2, 'Use Case 2'),
(3, 'Use Case 3'),
(4, 'Use Case 4'),
(5, 'Use Case 5');

-- Create table for uploaded vehicle data
CREATE TABLE vehicle_data (
    -- the triple is counted when comparing against node_limit
    osm_node_id BIGINT NOT NULL CHECK (osm_node_id > 0),
    customer_id INTEGER NOT NULL,
    use_case_id INTEGER NOT NULL,

    container_id BIGINT NOT NULL CHECK (container_id > 0),
    expires_at TIMESTAMPTZ NOT NULL,

    FOREIGN KEY (customer_id) REFERENCES customer_ids(id),
    FOREIGN KEY (use_case_id) REFERENCES use_case_ids(id),
    id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY
);

-- Add a triple-column index to improve search performance
CREATE INDEX idx_vehicle_data ON vehicle_data (osm_node_id, customer_id, use_case_id);

To fill the vehicle_data table with data I have defined a simple stored procedure:

CREATE OR REPLACE FUNCTION store_vehicle_data(
    _container_id BIGINT,
    _osm_node_ids BIGINT[],
    _customer_id INTEGER,
    _use_case_id INTEGER,
    _node_limit INTEGER,
    _retention_time INTERVAL
)
RETURNS BOOLEAN AS $$
DECLARE
    _osm_node_id BIGINT;
    _row_count INTEGER;
    _should_send_pull_container BOOLEAN := TRUE;
BEGIN
    -- Delete records with expired retention time - NOT WORKING
    DELETE FROM vehicle_data
    WHERE NOW() > expires_at;

    -- Insert new records
    FOREACH _osm_node_id IN ARRAY _osm_node_ids LOOP
        BEGIN
            INSERT INTO vehicle_data (
                osm_node_id,
                customer_id, 
                use_case_id, 
                container_id, 
                expires_at
            ) VALUES (
                _osm_node_id, 
                _customer_id, 
                _use_case_id, 
                _container_id,
                NOW() + _retention_time
            );
        EXCEPTION WHEN foreign_key_violation THEN
            RAISE EXCEPTION 'Invalid customer_id % or use_case_id % for osm_node_id % container_id: %',
                _customer_id, _use_case_id, _osm_node_id, _container_id;
        END;

        -- Check if the number of records exceeds the node limit
        SELECT COUNT(*)
        INTO STRICT _row_count
        FROM vehicle_data
        WHERE osm_node_id = _osm_node_id
        AND customer_id = _customer_id
        AND use_case_id = _use_case_id;

        -- There is enough up-to-date vehicle data for this triple,
        -- so tell the vehicle not to send any PULL containers
        IF _row_count > _node_limit THEN
            _should_send_pull_container := FALSE;
        END IF;
    END LOOP;

    RETURN _should_send_pull_container;
END;
$$ LANGUAGE plpgsql;

Finally, I have prepared a smoke test for my code and run it twice:

CREATE OR REPLACE FUNCTION test_store_vehicle_data(
    num_runs INTEGER,
    OUT count_true INTEGER,
    OUT count_false INTEGER
)
RETURNS RECORD AS $$
DECLARE
    test_result BOOLEAN;
BEGIN
    count_true := 0;
    count_false := 0;

    FOR i IN 1..num_runs LOOP
        -- Store OSM node ids (2 are same, 2 are changing)
        -- with node limit 10 and retention time 5 seconds
        test_result := store_vehicle_data(
            100 + i, 
            ARRAY[1000, 2000, 3000 + i, 4000 + i],
            1, 
            5, 
            10, 
            INTERVAL '5 seconds'
        );

        IF test_result THEN
            count_true := count_true + 1;
        ELSE
            count_false := count_false + 1;
        END IF;
    END LOOP;

    RETURN;
END $$ LANGUAGE plpgsql;

-- Run 2 smoke tests
DO $$
DECLARE
    test_result RECORD;
BEGIN
    -- Test 1: store 15x4 records expiring in 5 seconds
    SELECT * INTO test_result FROM test_store_vehicle_data(15);
    IF test_result.count_true != 10 OR test_result.count_false != 5 THEN
        RAISE EXCEPTION 'Test 1 failed: expected 10 TRUE, 5 FALSE';
    END IF;

    -- Sleep for 10 seconds, so that all records in vehicle_data expire
    PERFORM pg_sleep(10);

    -- Test 2: store 15x4 records expiring in 5 seconds
    SELECT * INTO test_result FROM test_store_vehicle_data(15);
    IF test_result.count_true != 10 OR test_result.count_false != 5 THEN
        RAISE EXCEPTION 'Test 2 failed: expected 10 TRUE, 5 FALSE';
    END IF;
END $$;

-- Print all records in the vehicle_data table
SELECT expires_at < NOW() AS expired, * FROM vehicle_data ORDER BY container_id;

My problem is that the DELETE FROM vehicle_data WHERE NOW() > expires_at; statement in my store_vehicle_data() function does not delete anything.

And then the SELECT expires_at < NOW() AS expired, * FROM vehicle_data ORDER BY container_id; prints the records in the table and yes, they are all expired there.

I run the above SQL code (creating tables, creating functions, running smoke test) in a Dockerfile based on the official Postgres Dockerfile and the smoke test (the Test 2) just always fails:

FROM postgres:17-alpine3.21
RUN apk update && apk upgrade && apk add --no-cache pg_top

ARG PGUSER
ARG PGPASSWORD

# Tell docker-entrypoint.sh to create superuser "postgres"
# with password passed as build arg and database "postgres"
ENV POSTGRES_PASSWORD=$PGPASSWORD

# Tell docker-entrypoint.sh to change these params in postgresql.conf
ENV POSTGRES_INITDB_ARGS="--set max_connections=200 --set shared_buffers=16GB --set work_mem=8MB --set maintenance_work_mem=128MB --set effective_cache_size=8GB --set from_collapse_limit=24 --set join_collapse_limit=24 --set log_min_messages=notice --set log_min_duration_statement=1000"

ENV PGUSER=$PGUSER
ENV PGPASSWORD=$PGPASSWORD
ENV PGDATABASE=push_pull_database

# The files below are executed by the superuser "postgres"
# in alphabetical order after the database has been initialized
WORKDIR /docker-entrypoint-initdb.d
COPY ./01-create-database.sh .
COPY ./02-create-tables.sql .
COPY ./03-create-functions.sql .
COPY ./04-alter-owner.sh .
COPY ./05-smoke-tests.sql .
RUN chmod +x ./01-create-database.sh ./04-alter-owner.sh

Then I connect using psql to my docker container and run the DELETE command at the psql prompt and voila – it works there as expected and deletes all the expired records.

Answers

Chosen as BEST ANSWER

Thank you @Zegarek for the thorough explanation!

I've decided to keep the NOW() in my stored function in the hope that it gives better performance and as the fix for the issue I have rewritten my smoke tests to run as plain SQL without any transaction -

CREATE OR REPLACE FUNCTION test_store_vehicle_data(
    num_runs INTEGER,
    OUT count_true INTEGER,
    OUT count_false INTEGER
)
RETURNS RECORD AS $$
DECLARE
    test_result BOOLEAN;
BEGIN
    count_true := 0;
    count_false := 0;

    FOR i IN 1..num_runs LOOP
        -- Store OSM node ids (2 are same, 2 are changing)
        -- with node limit 10 and retention time 5 seconds
        test_result := store_vehicle_data(
            100 + i, 
            ARRAY[1000, 2000, 3000 + i, 4000 + i],
            1, 
            5, 
            10, 
            INTERVAL '5 seconds'
        );

        IF test_result THEN
            count_true := count_true + 1;
        ELSE
            count_false := count_false + 1;
        END IF;
    END LOOP;

    RETURN;
END $$ LANGUAGE plpgsql;

-- Test 1: Run the test_store_vehicle_data function
-- to insert 15x4 records (node_limit 10, expiring
-- in 5 seconds) into the vehicle_data table and
-- finally store the result in a temporary table
CREATE TEMP TABLE temp_test_result AS
SELECT * FROM test_store_vehicle_data(15);

-- The temp_test_result has just 1 record, check it
SELECT 
    CASE 
        WHEN count_true != 10 OR count_false != 5 
        THEN 'Test 1 failed: expected 10 TRUE, 5 FALSE' 
        ELSE 'Test 1 passed' 
    END AS test_result
FROM temp_test_result;

-- Drop the temporary table to clean up
DROP TABLE temp_test_result;

-- Pause execution for 10 seconds so that all records expire
SELECT pg_sleep(10);

-- Test 2: Run the test_store_vehicle_data function
-- to insert 15x4 records (node_limit 10, expiring
-- in 5 seconds) into the vehicle_data table and
-- finally store the result in a temporary table
CREATE TEMP TABLE temp_test_result AS
SELECT * FROM test_store_vehicle_data(15);

-- The temp_test_result has just 1 record, check it
SELECT 
    CASE 
        WHEN count_true != 10 OR count_false != 5 
        THEN 'Test 2 failed: expected 10 TRUE, 5 FALSE' 
        ELSE 'Test 2 passed' 
    END AS test_result
FROM temp_test_result;

-- Drop the temporary table to clean up
DROP TABLE temp_test_result;

-- Print all records in the vehicle_data table
SELECT expires_at < NOW() AS is_expired, *
FROM vehicle_data
ORDER BY container_id;

(Edit)

The result of now() doesn’t change between your calls, so your test thinks no time has passed yet. The now() function and current_timestamp are one and the same and they have a third, more self-explanatory alias, transaction_timestamp().

Your whole do block runs in a single transaction unless you explicitly establish other transaction boundaries by issuing commits or use transaction-handling procedures inside it. This means your test keeps seeing the exact same time, each time it checks the clock:

transaction_timestamp() is equivalent to CURRENT_TIMESTAMP, but is named to clearly reflect what it returns.(…) now() is a traditional PostgreSQL equivalent to transaction_timestamp().

Use transactions to see changes in now() or switch to clock_timestamp():

clock_timestamp() returns the actual current time, and therefore its value changes even within a single SQL command.

_{demo at db<>fiddle}

do $d$
begin insert into debug_ select now(), statement_timestamp(), clock_timestamp();
      perform pg_sleep(.1);
      insert into debug_ select now(), statement_timestamp(), clock_timestamp();
      perform pg_sleep(.1);
      insert into debug_ select now(), statement_timestamp(), clock_timestamp();
end $d$;

table debug_;

now	statement_timestamp	clock_timestamp	id
2025-01-18 12:49:54.854183+00	2025-01-18 12:49:54.854183+00	2025-01-18 12:49:54.867843+00	1
2025-01-18 12:49:54.854183+00	2025-01-18 12:49:54.854183+00	2025-01-18 12:49:54.968707+00	2
2025-01-18 12:49:54.854183+00	2025-01-18 12:49:54.854183+00	2025-01-18 12:49:55.070274+00	3

Meanwhile:

do $d$
begin insert into debug_ select now(), statement_timestamp(), clock_timestamp();
      perform pg_sleep(.1);
      commit;
      insert into debug_ select now(), statement_timestamp(), clock_timestamp();
      perform pg_sleep(.1);
      commit;
      insert into debug_ select now(), statement_timestamp(), clock_timestamp();
end $d$;

table debug_;

now	statement_timestamp _{this one’s fun}	clock_timestamp	id
2025-01-18 12:49:55.075149+00	2025-01-18 12:49:55.075149+00	2025-01-18 12:49:55.075404+00	1
2025-01-18 12:49:55.176869+00	2025-01-18 12:49:55.075149+00	2025-01-18 12:49:55.177075+00	2
2025-01-18 12:49:55.277518+00	2025-01-18 12:49:55.075149+00	2025-01-18 12:49:55.27782+00	3

When in doubt, RAISE debug/log/info/notice messages to print out the runtime values you’re dealing with. If you only checked now() by running select now(); a few times from your client, you probably saw it keep returning the current time, changing with each call – that’s because most clients run in autocommit mode by default, so each of those selects was in a new, standalone transaction.

Please signup or login to give your own answer.

Click here to cancel reply.

Postgresql – Why is DELETE FROM vehicle_data WHERE NOW() > expires_at; failing in my stored function?

Answers