Postgresql - Listing intervals with the most overlaps

BinilThomas
September 18, 2023
169 views
0 votes
2 Answers

I have a table like:

create table movement_logs (
  id integer NOT NULL,
  room_id integer NOT NULL,
  entry_time timestamp NOT NULL,
  exit_time timestamp,
  user_id integer NOT NULL
)

This models a domain where users enter rooms. This table logs timestamps when users enter and leave rooms.

I want query for the time windows where most users were in a room. I am using Postgres. Can this be done in SQL?

Tags: database postgresql

Answers

Yes, this is possible in SQL.
See example.

--test data
create table movement_logs (
  id integer NOT NULL,
  room_id integer NOT NULL,
  entry_time timestamp NOT NULL,
  exit_time timestamp,
  user_id integer NOT NULL
);
insert into movement_logs values
 (1,10,'2023-09-15 10:00:00','2023-09-15 11:00:00',1001)
,(2,10,'2023-09-15 10:20:00','2023-09-15 11:20:00',1002)
,(3,10,'2023-09-15 10:20:00','2023-09-15 11:21:00',1003)
,(4,10,'2023-09-15 11:10:00','2023-09-15 11:21:00',1001)
,(5,20,'2023-09-15 12:20:00','2023-09-15 12:21:00',1001)
,(6,10,'2023-09-15 12:20:00','2023-09-15 12:21:00',1005)
;
-- query
select room_id,event_time
     -- cumulative sum of events by time
  ,sum(qty)over(partition by room_id order by event_time) qty
from(-- group events by time - count of events in this time
  select room_id,event_time,sum(inout) qty
  from( -- all events in log entry=+1 exit=-1
    select room_id,entry_time event_time, 1 inout
    from movement_logs
    union all
    select room_id,exit_time event_time, -1 inout
    from movement_logs
   )x
 group by room_id,event_time
)y
order by room_id,event_time

Query result

room_id	event_time	qty
10	2023-09-15 10:00:00	1
10	2023-09-15 10:20:00	3
10	2023-09-15 11:00:00	2
10	2023-09-15 11:10:00	3
10	2023-09-15 11:20:00	2
10	2023-09-15 11:21:00	0
10	2023-09-15 12:20:00	1
10	2023-09-15 12:21:00	0
20	2023-09-15 12:20:00	1
20	2023-09-15 12:21:00	0

A slightly different approach would be the usage of the range data type, tsrange. When you have an entry time and exit time, this would be beneficial to your data model and application because of the potential constraints, operators and indexes you could use.

CREATE TABLE movement_logs
(
    id              INTEGER NOT NULL,
    room_id         INTEGER NOT NULL,
    entry_exit_time TSRANGE NOT NULL,
    user_id         INTEGER NOT NULL
);
INSERT INTO movement_logs
VALUES (1, 10, TSRANGE('2023-09-15 10:00:00', '2023-09-15 11:00:00'), 1001)
     , (2, 10, TSRANGE('2023-09-15 10:20:00', '2023-09-15 11:20:00'), 1002)
     , (3, 10, TSRANGE('2023-09-15 10:20:00', '2023-09-15 11:21:00'), 1003)
     , (4, 10, TSRANGE('2023-09-15 11:10:00', '2023-09-15 11:21:00'), 1001)
     , (5, 20, TSRANGE('2023-09-15 12:20:00', '2023-09-15 12:21:00'), 1001)
     , (6, 10, TSRANGE('2023-09-15 12:20:00', '2023-09-15 12:21:00'), 1005)
;

SELECT room_id
     , s::TIMESTAMP
     , COUNT(user_id)
FROM movement_logs
         RIGHT JOIN GENERATE_SERIES('2023-09-15 10:00' -- start
                        , '2023-09-15 13:00' -- end
                        , INTERVAL '15 minutes') g(s)
                    ON entry_exit_time && TSRANGE(s::TIMESTAMP, (s + INTERVAL '15 minutes')::TIMESTAMP)
GROUP BY 1, 2
ORDER BY 2;

The function tsrange() makes the ranges for you, and could leave the lower or upper range NULL. The operator && looks for overlapping ranges and generate_series() creates the different time windows you want to measure. In this example I use an interval of 15 minutes, you could change this to some other value.

Please signup or login to give your own answer.

Click here to cancel reply.

Postgresql – Listing intervals with the most overlaps

Answers