skip to Main Content

I have a jsonb field in a database table that holds the following data:

{"access": {
  "D2024.06.13": [{"qty": 1, "time": "12:05"}, {"qty": 3, "time": "12:32"}],
  "D2024.06.14": [{"qty": 1, "time": "08:37"}]
}}

I would like to group the data by date and by time in 10 min slots (00:00 to 23:50)

so

2024.06.13
12:00 : 1
12:10 : 0
12:20 : 0
12:30 : 3
12:40 : 0
...

{'2024.06.13' : {'12:00':1, '12:10':0, '12:20':0, '12:30':3, '12:40':0 ....}, '2024.06.13' : {'12:00':1 ....}

so I can draw a timeline afterwards.

2

Answers


  1. In multiple steps:

    1. create table with sample data:
    create table mytable as 
    ( select 1 as x, '{"access": {
      "D2024.06.13": [{"qty": 1, "time": "12:05"}, {"qty": 3, "time": "12:32"}],
      "D2024.06.14": [{"qty": 1, "time": "08:37"}]
    }}'::jsonb as j);
    
    1. Get the number of days
    with nrofDays as (
       select 
          min(substr(key,2,10)::date) as fromdate, 
          max(substr(key,2,10)::date) as todate,
          max(substr(key,2,10)::date) - min(substr(key,2,10)::date)+1 as count
       from 
          mytable, 
          jsonb_each(j->'access')
    )
    SELECT * FROM nrofDays
    
    fromdate todate count
    2024-06-13 2024-06-14 2
    1. The select all 10-minute intervals:
    with nrofDays as (
       select 
          min(substr(key,2,10)::date) as fromdate, 
          max(substr(key,2,10)::date) as todate,
          max(substr(key,2,10)::date) - min(substr(key,2,10)::date)+1 as count
       from 
          mytable, 
          jsonb_each(j->'access')
    )
    ,timescale as (
       select ((select fromdate from nrofDays) + '00:00'::time + interval '1 minute' * t) as tim 
       from generate_series(0,(24*60*(select count from nrofDays)),10) t
    )
    select * from timescale
    
    tim
    2024-06-13 00:00:00.000
    2024-06-13 00:10:00.000
    2024-06-13 00:20:00.000
    2024-06-13 00:30:00.000
    2024-06-13 00:40:00.000
    2024-06-13 00:50:00.000
    2024-06-13 01:00:00.000
    2024-06-13 01:10:00.000
    2024-06-14 23:20:00.000
    2024-06-14 23:30:00.000
    2024-06-14 23:40:00.000
    2024-06-14 23:50:00.000
    2024-06-15 00:00:00.000
    1. select your data, in a normalized ways (no json columns):
      select 
       x,
       substring(key,2,10)::date as dat,
       (value->>'time')::time as tim,
       substring(key,2,10)::date + (value->>'time')::time as dt 
    from 
    (select x, key, value as v1
        from 
            mytable, 
            jsonb_each(j->'access') 
        ) x,
        jsonb_array_elements(v1)
    
    x dat tim dt
    1 2024-06-13 12:05:00 2024-06-13 12:05:00.000
    1 2024-06-13 12:32:00 2024-06-13 12:32:00.000
    1 2024-06-14 08:37:00 2024-06-14 08:37:00.000
    1. Select the data from step 2, and LEFT JOIN it with the results from step 3
    with nrofDays as (
       select 
          min(substr(key,2,10)::date) as fromdate, 
          max(substr(key,2,10)::date) as todate,
          max(substr(key,2,10)::date) - min(substr(key,2,10)::date)+1 as count
       from 
          mytable, 
          jsonb_each(j->'access')
    )
    ,timescale as (
       select ((select fromdate from nrofDays) + '00:00'::time + interval '1 minute' * t) as tim 
       from generate_series(0,(24*60*(select count from nrofDays)),10) t
    )
    select 
       timescale.tim,
       count(x)
    from timescale
    left join 
       (
       select 
       x,
       substring(key,2,10)::date as dat,
       (value->>'time')::time as tim,
       substring(key,2,10)::date + (value->>'time')::time as dt 
    from 
    (select x, key, value as v1
        from 
            mytable, 
            jsonb_each(j->'access') 
        ) x,
        jsonb_array_elements(v1)
    ) d on d.dt between timescale.tim and timescale.tim + interval '1 minute' * 10 
    group by timescale.tim
    order by timescale.tim
    

    MOTE: The selected days always start at ’00:00′, so you might need to do some tweaking when you whish them to start at ’12:05′ on your first day, but I leave that to you 😉

    see: DBFIDDLE

    Login or Signup to reply.
  2. Here is it using CTEs.

    -- Illustrative data
    
    create table raw_data (j jsonb);
    insert into raw_data values
    ('{"access": {
      "D2024.06.13": [{"qty": 1, "time": "12:05"}, {"qty": 3, "time": "12:32"}],
      "D2024.06.14": [{"qty": 1, "time": "08:37"}]
    }}'::jsonb),
    ('{"access": {
      "D2024.06.15": [{"qty": 1, "time": "13:05"}, {"qty": 3, "time": "13:32"}],
      "D2024.06.16": [{"qty": 1, "time": "09:37"}]
    }}'::jsonb);
    
    • Flatten the JSON data (flat_data CTE)
    • Make a list of all time points for every raw data date (time_slots CTE)
    • Join them.
    with flat_data as 
    (
     select substr(d, 2)::date event_date, 
            (l -> 'qty')::integer qty,
            date_bin(interval 'PT10M', substr(d, 2)::date + (l ->> 'time')::time, 'epoch') event_ts
     from raw_data,
     lateral jsonb_each (raw_data.j -> 'access') as t(d, v),
     lateral jsonb_array_elements(v) l
    ),
    days as 
    (
     select distinct event_date from flat_data
    ),
    time_slots as 
    (
     select generate_series(event_date + time '00:00', event_date + time '23:55', interval 'PT10M') as time_slot
     from days
    )
    select  time_slot::date as event_date,
            jsonb_object_agg(time_slot::time, coalesce(qty, 0)) as events
    from time_slots 
    left outer join flat_data on time_slot = event_ts
    group by time_slot::date
    order by time_slot::date;
    

    Demo

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search