Postgresql - Split time range into range for whole days and the remaining hours before / after

PeteG
September 27, 2023
220 views
0 votes
2 Answers

In Postgres, given a collection of date ranges like:

meter_id	device_id	start_at	end_at
meter1	device1	2020-01-02 10:30	2025-01-02 14:00
meter2	device1	2020-01-02 10:30	2020-01-02 11:30
meter3	device1	2020-01-02 10:30	2020-01-03 11:30

I want to bucket the ranges into:

a range for the whole days
a range for the hours / mins before the whole days
a range for the hours / mins after the whole days
if the range already fits into a day then just leave it alone

So the above would end up as:

meter_id	device_id	start_at	end_at	remark
meter1	device1	2020-01-02 10:30	2020-01-03 00:00	hours at start of 1st row
meter1	device1	2020-01-03 00:00	2025-01-02 00:00	whole days from 1st row
meter1	device1	2025-01-02 00:00	2025-01-02 14:00	hours at end of 1st row
meter2	device1	2020-01-02 10:30	2020-01-02 11:30	2nd row left alone
meter3	device1	2020-01-02 10:30	2020-01-03 00:00	hours at start of 3rd row
meter3	device1	2020-01-03 00:00	2020-01-03 11:30	hours at end of 3rd row

I have written something which works but it’s pretty complex and ugly.

Is there a simpler way of doing this?

Table and data:

CREATE TABLE IF NOT EXISTS metering_ranges (
    metering_point_id text NOT NULL,
    device_id text NOT NULL,
    start_at timestamp(0) with time zone NOT NULL,
    end_at timestamp(0) with time zone NOT NULL
);

INSERT INTO metering_ranges( metering_point_id, device_id,  start_at, end_at)
VALUES 
    ('meter1', 'device1', '2020-01-02 10:30:00', '2025-01-02 14:00:00'),
    ('meter2', 'device1', '2020-01-02 10:30:00', '2020-01-02 11:30:00'),
    ('meter3', 'device1', '2020-01-02 10:30:00', '2020-01-03 11:30:00');

Existing (complicated) solution

with 

ranges_with_whole_days as (
    SELECT
        metering_point_id,
        device_id,
        start_at,
        date_trunc('day', start_at) + interval '1 d' as start_at_next_whole_day,
        date_trunc('day', end_at) as end_at_whole_day,
        end_at
    FROM
        metering_ranges
),

ranges as (
    SELECT
        metering_point_id,
        device_id,
        start_at,
        CASE
            WHEN start_at_next_whole_day <= end_at_whole_day THEN start_at_next_whole_day ELSE NULL
        END as start_at_next_day,
        CASE 
            WHEN end_at_whole_day >= start_at_next_whole_day THEN end_at_whole_day ELSE NULL
        END as end_at_prev_day,
        end_at
    FROM
        ranges_with_whole_days
),

ranges_bucketed AS (
    -- get hours before whole day
    SELECT metering_point_id, device_id, start_at, start_at_next_day as end_at
    FROM ranges m
    WHERE start_at_next_day IS NOT NULL
    
    UNION
    
    -- get whole day period
    SELECT metering_point_id, device_id, start_at_next_day as start_at, end_at_prev_day as end_at
    FROM  ranges m
    WHERE start_at_next_day IS NOT NULL AND end_at_prev_day IS NOT NULL AND start_at_next_day != end_at_prev_day
    
    UNION
    
    -- get hours after whole day
    SELECT metering_point_id, device_id, end_at_prev_day as start_at, end_at
    FROM ranges m
    WHERE end_at_prev_day IS NOT NULL
    
    UNION
    
    -- get existing record if it fits within a day
    SELECT  metering_point_id,  device_id, start_at, end_at
    FROM ranges m
    WHERE start_at_next_day IS NULL AND end_at_prev_day IS NULL
) 

SELECT *
FROM ranges_bucketed
ORDER BY metering_point_id, device_id, start_at

Answers

After a bit of tinkering, came up with this and wanted to share but it may be a more ugly solution though.

select *
from 
(
select metering_point_id, device_id, 
start_at, 
case when date(start_at)=date(end_at) then end_at else date(start_at+INTERVAL '1 day') end end_at
from  metering_ranges
union
select metering_point_id, device_id, 
case when date(start_at)=date(end_at) then null else date(start_at+INTERVAL '1 day') end, 
date(end_at)
from  metering_ranges
union
select metering_point_id, device_id, 
case when date(start_at)=date(end_at) then null else date(end_at) end, 
end_at
from  metering_ranges
) t 
where t.start_at is not null and t.start_at!=t.end_at
order by metering_point_id

Assuming data type timestamp (Going with your sample data, not the contradicting table definition.) Else, you have to do more. See:

Ignoring time zones altogether in Rails and PostgreSQL

A bit simpler:

WITH cte AS (
   SELECT *
        , date_trunc('day', start_at) + interval '1 day'  AS start_day
        , date_trunc('day', end_at)                       AS end_day
        , start_at::time                                  AS start_time
        , end_at::time                                    AS end_time
        , start_at::time = '0:0' AND end_at::time = '0:0' AS full_days
   FROM   metering_ranges
   )
-- full days
SELECT metering_point_id, device_id, start_at, end_at
FROM   cte
WHERE  full_days

-- core days
UNION ALL  -- not just UNION !
SELECT metering_point_id, device_id, start_day, end_day
FROM   cte
WHERE  end_day > start_day
AND    NOT full_days

-- leading range
UNION ALL
SELECT metering_point_id, device_id, start_at, LEAST(start_day, end_at)
FROM   cte
WHERE  start_time <> '0:0'

-- dangling range
UNION ALL
SELECT metering_point_id, device_id, end_day, end_at
FROM   cte
WHERE  end_time <> '0:0'
AND    end_day >= start_day  -- not already covered by "leading range"

ORDER  BY 1, 2, 3;  -- optional

fiddle

Use UNION ALL, not UNION. Faster.
Also, you can append ORDER BY to a UNION query once at the end. That applies to the whole result set.