Postgresql - Select counts from one table based in the results from another table

MarioOrozco
December 23, 2024
91 views
0 votes
2 Answers

I have two tables, lets call them t1 and t2. Fiddle

select setseed(.42);

create table t1(a,b,c,d)as
select (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
from generate_series(1,100);

create table t2(a,b,c,d)as
select (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
from generate_series(1,200);

I’m trying to filter t2 and count based on my results from a t1 selected query. This would be an inefficient working example (takes half an hour to finish):

SELECT *,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A) AS cnt1,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A AND t2.B = T1.B) AS cnt2,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A AND t2.B = T1.B AND t2.C = T1.C) AS cnt3,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A AND t2.B = T1.B AND t2.C = T1.C AND t2.D = T1.D) AS cnt4,
   ... and so on ...

FROM t1 WHERE A>B AND 1.5>C AND D-E>A

And this is what I’m trying to do using CTE:

SELECT *,
  (WITH CTE1 AS (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A),
   CTE2 AS (SELECT COUNT (*) FROM CTE1 WHERE t2.B = T1.B),
   CTE3 AS (SELECT COUNT (*) FROM CTE2 WHERE t2.C = T1.C),
   CTE4 AS (SELECT COUNT (*) FROM CTE3 WHERE t2.D = T1.D)
     SELECT 
       (SELECT COUNT (*) FROM CTE) AS cnt1,
     SELECT 
       (SELECT COUNT (*) FROM CTE2) AS cnt2,
     SELECT 
       (SELECT COUNT (*) FROM CTE3) AS cnt3,
     SELECT 
       (SELECT COUNT (*) FROM CTE3) AS cnt4
   )

FROM t1 WHERE A>B AND 1.5>C AND D-E>A

This only allows to return a single result, ex. cnt1. My guess is that maybe I can pass the result as an array like the example above, but I’m not sure how to achieve this.

SELECT *,
  (WITH CTE1 AS (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A),
   CTE2 AS (SELECT COUNT (*) FROM CTE1 WHERE t2.B = T1.B),
   CTE3 AS (SELECT COUNT (*) FROM CTE2 WHERE t2.C = T1.C),
   CTE4 AS (SELECT COUNT (*) FROM CTE3 WHERE t2.D = T1.D)
     ARRAY (SELECT 
       (SELECT COUNT (*) FROM CTE) ,
     SELECT 
       (SELECT COUNT (*) FROM CTE2),
     SELECT 
       (SELECT COUNT (*) FROM CTE3),
     SELECT 
       (SELECT COUNT (*) FROM CTE3))
   ) AS CNT[]

FROM t1 WHERE A>B AND 1.5>C AND D-E>A

EDIT: This would be a simplified example of my tables and expected
result.

fiddle

create table t1(id,a,b,c) as 
 values(1,2,1,1),
       (2,3,2,2),
       (3,1,3,3),
       (4,2,4,4),
       (5,3,5,5),
       (6,1,2,6),
       (7,2,2,7),
       (8,3,2,8),
       (9,4,2,9),
       (10,5,4,1);

create table t2(id,a,b,c) as 
 values(1,2,1,1),
       (2,3,2,2),
       (3,1,3,1),
       (4,2,4,4),
       (5,3,5,5),
       (6,1,2,6),
       (7,2,2,7),
       (8,3,2,8),
       (9,4,3,9),
       (10,5,4,1),
       (11,3,2,2),
       (12,4,4,4),
       (13,5,6,5),
       (14,3,2,3),
       (15,2,2,7),
       (16,1,2,8),
       (17,4,1,9),
       (18,5,2,5),
       (19,6,3,6),
       (20,7,4,7);

so updating my faulty query for this example:

SELECT *,   
 (WITH CTE1 AS (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A),    
  CTE2 AS (SELECT COUNT (*) FROM CTE1 WHERE t2.B = T1.B),   
  CTE3 AS (SELECT COUNT (*) FROM CTE2 WHERE t2.C = T1.C)
     SELECT 
       (SELECT COUNT (*) FROM CTE) AS cnt1,
     SELECT 
       (SELECT COUNT (*) FROM CTE2) AS cnt2,
     SELECT 
       (SELECT COUNT (*) FROM CTE3) AS cnt3    
   ) 
 FROM t1 WHERE A>B AND C>1.5

Answers

The aggregate filter clause lets you do that in a single flat query, in a single pass.
_{demo at db<>fiddle}

select count(*)as cnt1
     , count(*)filter(where t2.b=t1.b)as cnt2 
     , count(*)filter(where t2.b=t1.b 
                        and t2.c=t1.c)as cnt3
     , count(*)filter(where t2.b=t1.b 
                        and t2.c=t1.c 
                        and t2.d=t1.d)as cnt4
from t1
join t2 using(a)
--where t1.a>t1.b 
--  and 1.5>t1.c 
--  and t1.d-t1.e>t1.a;

cnt1	cnt2	cnt3	cnt4
2031	225	17	2

If you’re fine with getting that in rows, not columns, you can make those matches/mismatches your grouping sets or a rollup.

select t1.b=t2.b as b
     , t1.c=t2.c as c
     , t1.d=t2.d as d
     , grouping(t1.b=t2.b,t1.c=t2.c,t1.d=t2.d)
     , count(*)
from t1
join t2 using(a)
group by rollup(t1.b=t2.b,t1.c=t2.c,t1.d=t2.d)
having null is not distinct from nullif((t1.b=t2.b and t1.c=t2.c and t1.d=t2.d),true)
order by grouping;

b	c	d	grouping	count
t	t	t	0	2
t	t	null	1	17
t	null	null	3	225
null	null	null	7	2031

The use of grouping() helps if you want to skip/hide the individual columns. Having is there to hide the counts of mismatches.

b	c	d	grouping	count
t	t	t	0	2
f	t	f	0	148
t	t	f	0	15
f	f	t	0	163
t	f	t	0	19
t	f	f	0	189
f	t	t	0	26
f	f	f	0	1469
t	f	null	1	208
t	t	null	1	17
f	f	null	1	1632
f	t	null	1	174
t	null	null	3	225
f	null	null	3	1806
null	null	null	7	2031

Note that if the columns are nullable, having would have to change and grouping() would be necessary to tell the difference between a grouping set where the column isn’t involved and one where the comparison actually yielded null.

Refering to your last sample data with expected result (the result seems to be wrong) maybe you could use something like below:

--      S a m p l e    D a t a : 
create table t1(id,a,b,c) as 
  values(1,2,1,1), (2,3,2,2), (3,1,3,3), (4,2,4,4), (5,3,5,5),
        (6,1,2,6), (7,2,2,7), (8,3,2,8), (9,4,3,9), -- it is (9,4,2,9)  in your fiddle but (9,4,3,9) in your result
        (10,5,4,1);

create table t2(id,a,b,c) as 
  values(1,2,1,1), (2,3,2,2), (3,1,3,1), (4,2,4,4), (5,3,5,5),
        (6,1,2,6), (7,2,2,7), (8,3,2,8), (9,4,3,9), (10,5,4,1),
        (11,3,2,2), (12,4,4,4), (13,5,6,5), (14,3,2,3), (15,2,2,7),
        (16,1,2,8), (17,4,1,9), (18,5,2,5), (19,6,3,6), (20,7,4,7);

1st – filter t1 to just rows of interest (id 2, 8 and 9)

WITH
  t1_filtered AS
    ( SELECT   t1.*
      FROM     t1 
      WHERE    a > b And c > 1.5
    )

… then cross join t2 with filtered data and use conditional aggregation to fetch the counts …

--      S Q L : 
Select     t.id, t.a, t.b, t.c,
           Count(Distinct Case When t2.a = t.a Then t2.id End) as cnt1,
           Count(Distinct Case When t2.a = t.a And t2.b = t.b Then t2.id End) as cnt2, 
           Count(Distinct Case When t2.a = t.a And t2.b = t.b And t2.c = t.c Then t2.id End) as cnt3
From       t1_filtered t
Cross Join t2 
Group By   t.id, t.a, t.b, t.c
Order By   t.id

R e s u l t :

id	a	b	c	cnt1	cnt2	cnt3
2	3	2	2	5	4	2
8	3	2	8	5	4	1
9	4	3	9	3	1	1

fiddle

… with randomly generated data it would be like in the fiddle

Please signup or login to give your own answer.

Click here to cancel reply.

Postgresql – Select counts from one table based in the results from another table

Answers