skip to Main Content

I’m not able to quite figure out how to capture the changes over time. I have the following:

db<>fiddle

CREATE TABLE term_changes (load_id int, merchant_id int, load_date date, terms varchar(50));

INSERT INTO term_changes (load_id, merchant_id, load_date, terms) VALUES 
  (1, 1, '2023-01-05', 'Roses are red'),
  (2, 2, '2023-01-05', 'Roses are blue'),
  (3, 1, '2023-01-06', 'Roses are red'),
  (4, 2, '2023-01-06', 'Roses are blue'),
  (5, 1, '2023-01-07', 'Roses are violet'),
  (6, 2, '2023-01-07', 'Roses are blue'),
  (7, 1, '2023-01-08', 'Roses are violet'),
  (8, 2, '2023-01-08', 'Roses are yellow');

WITH t1 AS (SELECT load_id, merchant_id, load_date, MD5(terms) AS terms
            FROM term_changes
            ORDER BY merchant_id, load_id),
t2 AS (SELECT load_id, 
              merchant_id, 
              load_date, 
              terms, 
              LAG(load_id, 1) OVER (PARTITION BY merchant_id 
                                    ORDER BY load_id) AS prev_load_id
       FROM t1)
SELECT *
FROM   t2 
       JOIN t1 ON t1.load_id = t2.prev_load_id
                  AND t1.merchant_id = t2.merchant_id
                  AND t1.terms != t2.terms

Which returns

load_id merchant_id load_date terms prev_load_id load_id merchant_id load_date terms
5 1 2023-01-07 84df2c2124ad3fc5c8cdf76ce1d7f3e3 3 3 1 2023-01-06 6becb043847fefb01e7989034cbdb136
8 2 2023-01-08 90b64ad67da829652ee622e0695748fc 6 6 2 2023-01-07 0e3eb8ff97b31e8874f7b51c23f242a2

The output I’m after captures the current and changed values by merchant_id:

load_id merchant_id load_date terms
1 1 2023-01-05 Roses are red
5 1 2023-01-07 Roses are violet
2 2 2023-01-05 Roses are blue
8 2 2023-01-08 Roses are yellow
  • Using load_id as there may be multiple entries per date

2

Answers


  1. Chosen as BEST ANSWER

    Looks like I just needed to change to a LEFT JOIN move the predicate:

    CREATE TABLE term_changes (load_id int, merchant_id int, load_date date, terms varchar(50));
    
    INSERT INTO term_changes (load_id, merchant_id, load_date, terms) VALUES 
      (1, 1, '2023-01-05', 'Roses are red'),
      (2, 2, '2023-01-05', 'Roses are blue'),
      (3, 1, '2023-01-06', 'Roses are red'),
      (4, 2, '2023-01-06', 'Roses are blue'),
      (5, 1, '2023-01-07', 'Roses are violet'),
      (6, 2, '2023-01-07', 'Roses are blue'),
      (7, 1, '2023-01-08', 'Roses are violet'),
      (8, 2, '2023-01-08', 'Roses are yellow'),
      (9, 3, '2023-01-05', 'Roses are green'),
      (10, 3, '2023-01-06', 'Roses are green'),
      (11, 3, '2023-01-07', 'Roses are green'),
      (12, 3, '2023-01-08', 'Roses are green');
    
    
    WITH t1 AS (SELECT load_id, 
                       merchant_id, 
                       load_date, 
                       terms, 
                       MD5(terms) AS terms_hashed
                FROM term_changes
                ORDER BY merchant_id, load_id),
    t2 AS (SELECT load_id, 
                  merchant_id, 
                  load_date, 
                  terms, 
                  terms_hashed, 
                  LAG(load_id, 1) OVER (PARTITION BY merchant_id 
                                        ORDER BY load_id) AS prev_load_id
           FROM t1)
    SELECT t2.load_id,
           t2.merchant_id,
           t2.load_date,
           t2.terms
    FROM   t2 
           LEFT JOIN t1 ON t1.load_id = t2.prev_load_id
                           AND t1.merchant_id = t2.merchant_id
    WHERE  ( t1.terms_hashed != t2.terms_hashed
             OR t2.prev_load_id IS NULL )
    
    load_id merchant_id load_date terms
    1 1 2023-01-05 Roses are red
    5 1 2023-01-07 Roses are violet
    2 2 2023-01-05 Roses are blue
    8 2 2023-01-08 Roses are yellow
    9 3 2023-01-05 Roses are green

    fiddle


  2. You can use this query:

    WITH t1 AS (
      SELECT 
        load_id, 
        merchant_id, 
        load_date, 
        terms, 
        LAG(terms) OVER (PARTITION BY merchant_id ORDER BY load_date) AS prev_terms
      FROM term_changes
    ),
    t2 AS (
      SELECT 
        load_id, 
        merchant_id, 
        load_date, 
        terms 
      FROM t1 
      WHERE prev_terms IS NULL OR prev_terms != terms
    )
    SELECT *
    FROM t2
    ORDER BY merchant_id, load_date;
    

    fiddle

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search