skip to Main Content

I have two tables, lets call them t1 and t2. Fiddle

select setseed(.42);

create table t1(a,b,c,d)as
select (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
from generate_series(1,100);

create table t2(a,b,c,d)as
select (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
from generate_series(1,200);

I’m trying to filter t2 and count based on my results from a t1 selected query. This would be an inefficient working example (takes half an hour to finish):

SELECT *,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A) AS cnt1,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A AND t2.B = T1.B) AS cnt2,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A AND t2.B = T1.B AND t2.C = T1.C) AS cnt3,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A AND t2.B = T1.B AND t2.C = T1.C AND t2.D = T1.D) AS cnt4,
   ... and so on ...

FROM t1 WHERE A>B AND 1.5>C AND D-E>A

And this is what I’m trying to do using CTE:

SELECT *,
  (WITH CTE1 AS (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A),
   CTE2 AS (SELECT COUNT (*) FROM CTE1 WHERE t2.B = T1.B),
   CTE3 AS (SELECT COUNT (*) FROM CTE2 WHERE t2.C = T1.C),
   CTE4 AS (SELECT COUNT (*) FROM CTE3 WHERE t2.D = T1.D)
     SELECT 
       (SELECT COUNT (*) FROM CTE) AS cnt1,
     SELECT 
       (SELECT COUNT (*) FROM CTE2) AS cnt2,
     SELECT 
       (SELECT COUNT (*) FROM CTE3) AS cnt3,
     SELECT 
       (SELECT COUNT (*) FROM CTE3) AS cnt4
   )

FROM t1 WHERE A>B AND 1.5>C AND D-E>A

This only allows to return a single result, ex. cnt1. My guess is that maybe I can pass the result as an array like the example above, but I’m not sure how to achieve this.

SELECT *,
  (WITH CTE1 AS (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A),
   CTE2 AS (SELECT COUNT (*) FROM CTE1 WHERE t2.B = T1.B),
   CTE3 AS (SELECT COUNT (*) FROM CTE2 WHERE t2.C = T1.C),
   CTE4 AS (SELECT COUNT (*) FROM CTE3 WHERE t2.D = T1.D)
     ARRAY (SELECT 
       (SELECT COUNT (*) FROM CTE) ,
     SELECT 
       (SELECT COUNT (*) FROM CTE2),
     SELECT 
       (SELECT COUNT (*) FROM CTE3),
     SELECT 
       (SELECT COUNT (*) FROM CTE3))
   ) AS CNT[]

FROM t1 WHERE A>B AND 1.5>C AND D-E>A

EDIT: This would be a simplified example of my tables and expected
result.

fiddle

create table t1(id,a,b,c) as 
 values(1,2,1,1),
       (2,3,2,2),
       (3,1,3,3),
       (4,2,4,4),
       (5,3,5,5),
       (6,1,2,6),
       (7,2,2,7),
       (8,3,2,8),
       (9,4,2,9),
       (10,5,4,1);

create table t2(id,a,b,c) as 
 values(1,2,1,1),
       (2,3,2,2),
       (3,1,3,1),
       (4,2,4,4),
       (5,3,5,5),
       (6,1,2,6),
       (7,2,2,7),
       (8,3,2,8),
       (9,4,3,9),
       (10,5,4,1),
       (11,3,2,2),
       (12,4,4,4),
       (13,5,6,5),
       (14,3,2,3),
       (15,2,2,7),
       (16,1,2,8),
       (17,4,1,9),
       (18,5,2,5),
       (19,6,3,6),
       (20,7,4,7);

so updating my faulty query for this example:

SELECT *,   
 (WITH CTE1 AS (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A),    
  CTE2 AS (SELECT COUNT (*) FROM CTE1 WHERE t2.B = T1.B),   
  CTE3 AS (SELECT COUNT (*) FROM CTE2 WHERE t2.C = T1.C)
     SELECT 
       (SELECT COUNT (*) FROM CTE) AS cnt1,
     SELECT 
       (SELECT COUNT (*) FROM CTE2) AS cnt2,
     SELECT 
       (SELECT COUNT (*) FROM CTE3) AS cnt3    
   ) 
 FROM t1 WHERE A>B AND C>1.5 

2

Answers


  1. The aggregate filter clause lets you do that in a single flat query, in a single pass.
    demo at db<>fiddle

    select count(*)as cnt1
         , count(*)filter(where t2.b=t1.b)as cnt2 
         , count(*)filter(where t2.b=t1.b 
                            and t2.c=t1.c)as cnt3
         , count(*)filter(where t2.b=t1.b 
                            and t2.c=t1.c 
                            and t2.d=t1.d)as cnt4
    from t1
    join t2 using(a)
    --where t1.a>t1.b 
    --  and 1.5>t1.c 
    --  and t1.d-t1.e>t1.a;
    
    cnt1 cnt2 cnt3 cnt4
    2031 225 17 2

    If you’re fine with getting that in rows, not columns, you can make those matches/mismatches your grouping sets or a rollup.

    select t1.b=t2.b as b
         , t1.c=t2.c as c
         , t1.d=t2.d as d
         , grouping(t1.b=t2.b,t1.c=t2.c,t1.d=t2.d)
         , count(*)
    from t1
    join t2 using(a)
    group by rollup(t1.b=t2.b,t1.c=t2.c,t1.d=t2.d)
    having null is not distinct from nullif((t1.b=t2.b and t1.c=t2.c and t1.d=t2.d),true)
    order by grouping;
    
    b c d grouping count
    t t t 0 2
    t t null 1 17
    t null null 3 225
    null null null 7 2031

    The use of grouping() helps if you want to skip/hide the individual columns. Having is there to hide the counts of mismatches.

    b c d grouping count
    t t t 0 2
    f t f 0 148
    t t f 0 15
    f f t 0 163
    t f t 0 19
    t f f 0 189
    f t t 0 26
    f f f 0 1469
    t f null 1 208
    t t null 1 17
    f f null 1 1632
    f t null 1 174
    t null null 3 225
    f null null 3 1806
    null null null 7 2031

    Note that if the columns are nullable, having would have to change and grouping() would be necessary to tell the difference between a grouping set where the column isn’t involved and one where the comparison actually yielded null.

    Login or Signup to reply.
  2. Refering to your last sample data with expected result (the result seems to be wrong) maybe you could use something like below:

    --      S a m p l e    D a t a : 
    create table t1(id,a,b,c) as 
      values(1,2,1,1), (2,3,2,2), (3,1,3,3), (4,2,4,4), (5,3,5,5),
            (6,1,2,6), (7,2,2,7), (8,3,2,8), (9,4,3,9), -- it is (9,4,2,9)  in your fiddle but (9,4,3,9) in your result
            (10,5,4,1);
    
    create table t2(id,a,b,c) as 
      values(1,2,1,1), (2,3,2,2), (3,1,3,1), (4,2,4,4), (5,3,5,5),
            (6,1,2,6), (7,2,2,7), (8,3,2,8), (9,4,3,9), (10,5,4,1),
            (11,3,2,2), (12,4,4,4), (13,5,6,5), (14,3,2,3), (15,2,2,7),
            (16,1,2,8), (17,4,1,9), (18,5,2,5), (19,6,3,6), (20,7,4,7);
    

    1st – filter t1 to just rows of interest (id 2, 8 and 9)

    WITH
      t1_filtered AS
        ( SELECT   t1.*
          FROM     t1 
          WHERE    a > b And c > 1.5
        )
    

    … then cross join t2 with filtered data and use conditional aggregation to fetch the counts …

    --      S Q L : 
    Select     t.id, t.a, t.b, t.c,
               Count(Distinct Case When t2.a = t.a Then t2.id End) as cnt1,
               Count(Distinct Case When t2.a = t.a And t2.b = t.b Then t2.id End) as cnt2, 
               Count(Distinct Case When t2.a = t.a And t2.b = t.b And t2.c = t.c Then t2.id End) as cnt3
    From       t1_filtered t
    Cross Join t2 
    Group By   t.id, t.a, t.b, t.c
    Order By   t.id
    

    R e s u l t :

    id a b c cnt1 cnt2 cnt3
    2 3 2 2 5 4 2
    8 3 2 8 5 4 1
    9 4 3 9 3 1 1

    fiddle

    … with randomly generated data it would be like in the fiddle

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search