Postgresql - Getting previous month's existing student count if there are no new adds this month

Sam
October 24, 2024
45 views
0 votes
2 Answers

I have a table with student data which includes their start and withdrawal dates. Not all students have withdrawal dates as they are still enrolled.
I am trying to calculate the number of new students per month, withdrawals per month, existing students per month. I have already completed counts of new and withdrawals per month, I am facing trouble in the calculation of existing students.

Data starts from January 2023.

This is the query I have come up with:

WITH student_activity AS 
(
    -- Convert start and withdrawal date keys to actual date format
    SELECT to_date(fe.start_date_key::text, 'YYYYMMDD') AS start_date,
    to_date(fe.withdrawal_date_key::text, 'YYYYMMDD') AS withdrawal_date,
    dp.product_name, dp.sku
    FROM fact_enrolment fe
    INNER JOIN dim_product dp ON fe.product_key = dp.product_key
)
SELECT date_trunc('month', month_series) AS month,
COUNT(*) AS existing_students,
sa.product_name
FROM (
    SELECT generate_series(
    (SELECT MIN(to_date(start_date_key::text, 'YYYYMMDD')) FROM fact_enrolment),'2100-12-31',INTERVAL '1 month') AS month_series) AS months
LEFT JOIN student_activity sa ON sa.start_date < month_series AND (sa.withdrawal_date IS NULL OR sa.withdrawal_date >= month_series)
GROUP BY month, sa.product_name

This query does get the existing counts correctly but I am facing an issue where if a month does not have students starting in it, then the month is not displayed at all in the result set. I need it to still display each month and if it does not have any new values to it, then it should use the previous month’s count as those students are still enrolled.

CREATE TABLE fact_enrolment (
    student_key int4 NULL,
    start_date_key int4 NULL,
    withdrawal_date_key int4 NULL,
    product_key int4 NULL);

CREATE TABLE dim_product (
    product_key int4 GENERATED ALWAYS AS IDENTITY( INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START 1 CACHE 1 NO CYCLE) NOT NULL,
    product_name varchar NULL,
    sku varchar NULL);

INSERT INTO dim_product (product_name , sku) VALUES ('Preschool' , 'ABC123');

INSERT INTO fact_enrolment (student_key, start_date_key, withdrawal_date_key, product_key) VALUES 
  (12, 20230105, 20230130, 1)
, (14, 20230106, 20230120, 1)
, (45, 20230405, 20230420, 1);
INSERT INTO fact_enrolment (student_key, start_date_key, product_key) VALUES 
  (17, 20230110, 1)
, (20, 20230120, 1)
, (21, 20230220, 1)
, (22, 20230202, 1)
, (23, 20230228, 1)
, (34, 20230206, 1)
, (44, 20230406, 1);

Calculations based on the sample given:

month	adds	withdrawals	existing students
2023-01	4	2	4
2023-02	4	0	6
2023-03	0	0	6
2023-04	2	1	8

Assuming that the withdrawals happen at the end of the month in the above table, they are subtracted in the next month’s count.

In the example to note 2023-03 is still shown with the previous month’s count.

I am using Postgres version 13.15.

Answers

You can use aggregate filter clause to count(*) different things in a single aggregated query. Window functions then let you run two stepping sums of those counts and subtract them to get current balance.
_{demo at db<>fiddle}

WITH student_activity AS (
    SELECT date_trunc('month',to_date(fe.start_date_key::text, 'YYYYMMDD'))::date AS start_month
         , date_trunc('month',to_date(fe.withdrawal_date_key::text, 'YYYYMMDD'))::date AS withdrawal_month
         , dp.product_name
         , dp.sku
    FROM fact_enrolment AS fe
    JOIN dim_product AS dp 
      ON fe.product_key = dp.product_key )
,calendar as (
    select month::date
    from (select min(start_month) as earliest_month
               , max(withdrawal_month) as latest_month
          from student_activity) as limits
    cross join lateral generate_series( earliest_month
                                       ,latest_month
                                       ,'1 month'::interval) as month)
,monthlies as (
    select month
         , count(*)filter(where month=start_month) as enrollments
         , count(*)filter(where month=withdrawal_month) as withdrawals
    from calendar
    left join student_activity as fe
    on month=any(array[start_month,withdrawal_month]) 
    group by month)
select*, sum(enrollments)over w1
        -sum(withdrawals)over w1 as "existing students"
from monthlies
window w1 as(order by month)
order by month;

I’m not sure why after 10 students enrolled and 3 withdrew, you expect to be left with 8. That’s 7:

month	enrollments	withdrawals	existing students
2023-01-01	4	2	2
2023-02-01	4	0	6
2023-03-01	0	0	6
2023-04-01	2	1	7

You need to create "date_spine" dataset that You can use as Your main table during the joins.

WITH 
date_spine AS (
  --this will generate date spine with all months from given range
  --this will be used as main table to join as we need all months from here
  SELECT 
    distinct 
      to_char(day, 'YYYY-MM') as spine_month
  FROM generate_series
          ( '2023-01-01'::timestamp, '2030-12-01'::timestamp, '1 day'::interval) day
),
student_activity AS (
  --cleans data table with student activities
  --converts dates to YYYY-MM format string, which will later be used to join to data spine
  SELECT
    student_key,
    to_char(to_date(fe.start_date_key::text, 'YYYYMMDD'), 'YYYY-MM') as start_month,
    coalesce(to_char(to_date(fe.withdrawal_date_key::text, 'YYYYMMDD'), 'YYYY-MM'), '2999-12') as withdrawal_month
  FROM fact_enrolment fe
),
adds AS (
  --calculates number of new students in given month
  select
    start_month,
    count(*) as adds
  from student_activity
  group by start_month
),
withdrawals AS (
  --calculates number of withdrawals in given month
  select
    withdrawal_month,
    count(*) as withdrawals
  from student_activity
  group by withdrawal_month
),
existing_students AS (
  --calculates number of 'active' students during each month of data spine
  --this assumes that if a student start and withdrawal is in same month such student will be counted as existing in given month
  select 
    count(student_activity.student_key) as existing_students,
    date_spine.spine_month
  from date_spine
  left join student_activity 
    on date_spine.spine_month between student_activity.start_month and student_activity.withdrawal_month
  group by date_spine.spine_month
),
joins AS (
  --joins data spine with adds, withdrawals and existing students
  select 
    date_spine.spine_month as month,
    coalesce(adds.adds, 0) as adds,
    coalesce(withdrawals.withdrawals, 0) as withdrawals,
    coalesce(existing_students.existing_students, 0) as existing_students
  from date_spine
  left join adds on date_spine.spine_month = adds.start_month
  left join withdrawals on date_spine.spine_month = withdrawals.withdrawal_month
  left join existing_students on date_spine.spine_month = existing_students.spine_month
)
select * from joins order by month

The result that You will get is like below:

 month  | adds | withdrawals | existing_students 
---------+------+-------------+-------------------
 2023-01 |    4 |           2 |                 4
 2023-02 |    4 |           0 |                 6
 2023-03 |    0 |           0 |                 6
 2023-04 |    2 |           1 |                 8
 2023-05 |    0 |           0 |                 7
 2023-06 |    0 |           0 |                 7
 2023-07 |    0 |           0 |                 7
 2023-08 |    0 |           0 |                 7
 2023-09 |    0 |           0 |                 7
 2023-10 |    0 |           0 |                 7
 2023-11 |    0 |           0 |                 7
 2023-12 |    0 |           0 |                 7
...
 2029-12 |    0 |           0 |                 7
 2030-01 |    0 |           0 |                 7
 2030-02 |    0 |           0 |                 7
 2030-03 |    0 |           0 |                 7
 2030-04 |    0 |           0 |                 7
 2030-05 |    0 |           0 |                 7
 2030-06 |    0 |           0 |                 7
 2030-07 |    0 |           0 |                 7
 2030-08 |    0 |           0 |                 7
 2030-09 |    0 |           0 |                 7
 2030-10 |    0 |           0 |                 7
 2030-11 |    0 |           0 |                 7
 2030-12 |    0 |           0 |                 7

Please signup or login to give your own answer.

Click here to cancel reply.

Postgresql – Getting previous month's existing student count if there are no new adds this month

Answers