question

brendagrossnickle-6913 avatar image
0 Votes"
brendagrossnickle-6913 asked RyanAbbey-0701 commented

Need Help with SQL UPDATE where the Target and Source have a many to many relationship

I have a really large transaction table with billions of rows. Need to match up Authorization request row - tran_type1 'A', to a declined notice row - tran_type1 = 'P' and tran_type2 = 'O'. Besides the tran_type1 and tran_type2 a match must also have the same acct_nbr, same tran_amt, and the A and P,O rows are within 60 seconds of each other, with the A coming first.

The code below "works" except that the same target (Authorization) row gets updated multiple times or a single source row (decline notice) updates multiple target rows. Assume that I need a PARTITION OVER() ROW_NUMBER or RANK to join on so that the the Many to Many Update becomes a One to One. Can someone help me with any ideas or code examples? Thanks

In the result set below the *** below are problems. A single Authorization request should match a single decline notice

--ID-03 - matches ID-04
--ID-06 - matches ID-07
--ID-09 - matches ID-11
--ID-10 - matches ID-11 ID-10 should match ID-12
--ID-13 - matches ID-15
--ID-14 - matches ID-15
ID-14 should not be declined. ID-15 has already matched ID13.


 /****
    
 Need to match an Authorization request row - tran_type1 "A' to a declined notice row,
 tran_type1 = 'P' and tran_type2 = 'O'. Besides the tran_type1 and tran_type2 a match 
 must also have the same acct_nbr, same tran_amt, and the A and P,O row are within 60
  seconds of each other, with the A coming first.
    
    
 The *** below are problems. A single Authorization request should match a single decline notice
    
 --ID-03 - matches ID-04
 --ID-06 - matches ID-07 
 --ID-09 - matches ID-11
 --ID-10 - matches ID-11 *** ID-10 should match ID-12
 --ID-13 - matches ID-15 
 --ID-14 - matches ID-15 *** ID-14 should not be declined. ID-15 has already matched ID13. 
    
 ***/
    
    
    
 IF(OBJECT_ID('tempdb..#trans') IS NOT NULL) DROP TABLE #trans;
     create table #trans (acct_nbr varchar(128), tran_dt datetime, tran_amt decimal(38,2), tran_type1 varchar(128), tran_type2 varchar(128), tran_id varchar(128), tran_id_decline varchar(128))
    
 insert into #trans (acct_nbr, tran_dt, tran_amt, tran_type1, tran_type2, tran_id
 )
 values 
 ('111', '2022-05-08 11:08:47.257', '1.00', 'C', 'X', 'ID-01'),
 ('111', '2022-05-08 11:08:47.257', '1.00', 'A', '', 'ID-02'),
 ('111', '2022-05-09 11:10:47.257', '1.00', 'A', '', 'ID-03'),
 ('111', '2022-05-09 11:11:11.257', '1.00', 'P', 'O', 'ID-04'),
 ('111', '2022-05-09 13:11:33.123', '1.00', 'A', 'X', 'ID-05'),
 ('111', '2022-05-09 14:10:47.257', '1.00', 'A', '', 'ID-06'),
 ('111', '2022-05-09 14:11:17.257', '1.00', 'P', 'O', 'ID-07'),
 ('111', '2022-05-09 14:11:48.257', '1.00', 'P', 'O', 'ID-08') ,
 ('222', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-09'),
 ('222', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-10'),
 ('222', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-11'),
 ('222', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-12'),
 ('333', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-13'),
 ('333', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-14'),
 ('333', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-15');
    
     
 IF(OBJECT_ID('tempdb..#tran_decline_notice') IS NOT NULL) DROP TABLE #tran_decline_notice;
 select * into #tran_decline_notice from (select * from #trans where tran_type1 = 'P' and tran_type2 = 'O') a
    
 update  t set tran_id_decline = d.tran_id
 --select t.*, d.trn_id as tran_id_decline
     from  #trans t
     left join #tran_decline_notice d
     on t.acct_nbr = d.acct_nbr and t.tran_amt = d.tran_amt
     where DATEDIFF(ss,t.tran_dt,d.tran_dt) between 0 and 60 and t.tran_type1 = 'A' and d.tran_type1 = 'P' and d.tran_type2 = 'O'
    
 select * from #trans
sql-server-transact-sql
· 1
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

Hi,@brendagrossnickle-6913

Have you tried the solutions below ?Your needs have been achieved, do you need us to follow up.

Bert Zhou

0 Votes 0 ·
JingyangLi avatar image
0 Votes"
JingyangLi answered JingyangLi commented
   ;with mycte as (
   select t.acct_nbr    ,    t.tran_amt, d.tran_id as tran_id_decline
   , ROW_NUMBER()OVER (PARTITION by t.acct_nbr,    t.tran_amt  Order BY t.tran_type1  )  rn
      from  #trans t
      left join #tran_decline_notice d
      on t.acct_nbr = d.acct_nbr and t.tran_amt = d.tran_amt
      where DATEDIFF(ss,t.tran_dt,d.tran_dt) between 0 and 60 and t.tran_type1 = 'A' and d.tran_type1 = 'P' and d.tran_type2 = 'O')
    
      Merge #trans t
      using mycte m on m.acct_nbr=t.acct_nbr and m.tran_amt=t.tran_amt and rn=1
      WHen matched then
      Update 
      Set   tran_id_decline = m.tran_id_decline;
    
      select * from #trans
· 4
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

it is not always going to be rn=1.

The result set that i am looking for is the following. The differences from what i am currently getting in my example are highlighted in yellow

200298-result-set.jpg


0 Votes 0 ·
result-set.jpg (76.1 KiB)
JingyangLi avatar image JingyangLi brendagrossnickle-6913 ·

What is your expected result from your sample data?

0 Votes 0 ·

i just pasted the screen shot above

0 Votes 0 ·
Show more comments
NaomiNNN avatar image
1 Vote"
NaomiNNN answered JingyangLi commented

Try:

 IF (OBJECT_ID('tempdb..#trans') IS NOT NULL)
     DROP TABLE #trans;
 CREATE TABLE #trans
     (
         acct_nbr VARCHAR(128)
         , tran_dt DATETIME
         , tran_amt DECIMAL(38, 2)
         , tran_type1 VARCHAR(128)
         , tran_type2 VARCHAR(128)
         , tran_id VARCHAR(128)
         , tran_id_decline VARCHAR(128)
     );
    
 INSERT INTO
     #trans (acct_nbr, tran_dt, tran_amt, tran_type1, tran_type2, tran_id)
 VALUES
     ('111', '2022-05-08 11:08:47.257', '1.00', 'C', 'X', 'ID-01')
     , ('111', '2022-05-08 11:08:47.257', '1.00', 'A', '', 'ID-02')
     , ('111', '2022-05-09 11:10:47.257', '1.00', 'A', '', 'ID-03')
     , ('111', '2022-05-09 11:11:11.257', '1.00', 'P', 'O', 'ID-04')
     , ('111', '2022-05-09 13:11:33.123', '1.00', 'A', 'X', 'ID-05')
     , ('111', '2022-05-09 14:10:47.257', '1.00', 'A', '', 'ID-06')
     , ('111', '2022-05-09 14:11:17.257', '1.00', 'P', 'O', 'ID-07')
     , ('111', '2022-05-09 14:11:48.257', '1.00', 'P', 'O', 'ID-08')
     , ('222', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-09')
     , ('222', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-10')
     , ('222', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-11')
     , ('222', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-12')
     , ('333', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-13')
     , ('333', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-14')
     , ('333', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-15');
    
    
 IF (OBJECT_ID('tempdb..#tran_decline_notice') IS NOT NULL)
     DROP TABLE #tran_decline_notice;
 SELECT
      *
 INTO
      #tran_decline_notice
 FROM (SELECT * FROM #trans WHERE tran_type1 = 'P' AND tran_type2 = 'O') a;
    
 SELECT * FROM #tran_decline_notice ORDER BY #tran_decline_notice.tran_id;
    
    
 ;WITH cte AS
     (
     SELECT
         t.*
         , d.tran_dt AS decline_dt
         , d.tran_id AS tran_id_decline_ID
         , ROW_NUMBER() OVER (PARTITION BY
                                  t.acct_nbr
                                  , t.tran_amt
                                  , t.tran_type1
                                  , t.tran_id
                              ORDER BY
                                  t.tran_id
                             ) AS Rn
     FROM
         #trans t
         LEFT JOIN #tran_decline_notice d
             ON t.acct_nbr = d.acct_nbr
                AND t.tran_amt = d.tran_amt
                AND t.tran_dt BETWEEN DATEADD(SECOND, -60, d.tran_dt) AND d.tran_dt
                AND t.tran_type1 = 'A'
     )
       , cte2 AS
     (
     SELECT
         *
         , ROW_NUMBER() OVER (PARTITION BY
                                  cte.acct_nbr
                                  , cte.tran_amt
                                  , cte.tran_type1
                                  , cte.tran_id_decline_ID
                              ORDER BY
                                  cte.tran_id_decline_ID
                                  , Rn
                             ) AS Rn2
     FROM
         cte
     )
 MERGE #trans t
 USING cte2 AS src
 ON t.tran_id = src.tran_id
    AND src.Rn2 = 1
 WHEN MATCHED AND t.tran_type1 = 'A' THEN
     UPDATE SET
         t.tran_id_decline = src.tran_id_decline_ID;
    
    
 SELECT * FROM #trans;
· 5
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

Both solutions from NaomiNNN and JingyangLi are identical. So commenting on them both.

Looking at acct_nbr '222' - it works on one sql server, but not another. On one server ID-9 matches ID-11 and ID-10 matches ID-12. This is good. There are two separate Authorizations and two separate Decline Notices. But ... on another server, ID-10 Matches ID-11 and then ID-10 matches ID-12. ID-10 ends up with two Decline Notices and ID-09 has none.

First CTE gets this as a result set for '222'

![201110-image.png][1]


Now CTE2 has this ORDER BY. But that order by does not guarantee the order of the tran_id. So ID-10 could some first for Decline Notice ID-11 and ID-12.

                           ORDER BY
                               cte.tran_id_decline_ID
                               , Rn


I have played and played with the PARTITION and ORDER BY of the two CTEs and the ON condition in the MERON. But cannot get it to consistently work.

0 Votes 0 ·

Here is the result set mentioned in my previous Reply.


201157-cte-result-set.jpg


0 Votes 0 ·
cte-result-set.jpg (34.2 KiB)
JingyangLi avatar image JingyangLi brendagrossnickle-6913 ·

Do you have an updated source dataset ? I assume the above is what you are expecting.
Just to be clear, your tran_id column will have duplicate values now?

0 Votes 0 ·
Show more comments
JingyangLi avatar image
0 Votes"
JingyangLi answered
  IF(OBJECT_ID('tempdb..#trans') IS NOT NULL) DROP TABLE #trans;
      create table #trans (acct_nbr varchar(128), tran_dt datetime, tran_amt decimal(38,2), tran_type1 varchar(128), tran_type2 varchar(128), tran_id varchar(128), tran_id_decline varchar(128))
        
  insert into #trans (acct_nbr, tran_dt, tran_amt, tran_type1, tran_type2, tran_id
  )
  values 
  ('111', '2022-05-08 11:08:47.257', '1.00', 'C', 'X', 'ID-01'),
  ('111', '2022-05-08 11:08:47.257', '1.00', 'A', '', 'ID-02'),
  ('111', '2022-05-09 11:10:47.257', '1.00', 'A', '', 'ID-03'),
  ('111', '2022-05-09 11:11:11.257', '1.00', 'P', 'O', 'ID-04'),
  ('111', '2022-05-09 13:11:33.123', '1.00', 'A', 'X', 'ID-05'),
  ('111', '2022-05-09 14:10:47.257', '1.00', 'A', '', 'ID-06'),
  ('111', '2022-05-09 14:11:17.257', '1.00', 'P', 'O', 'ID-07'),
  ('111', '2022-05-09 14:11:48.257', '1.00', 'P', 'O', 'ID-08') ,
  ('222', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-09'),
  ('222', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-10'),
  ('222', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-11'),
  ('222', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-12'),
  ('333', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-13'),
  ('333', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-14'),
  ('333', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-15');
        
         
  IF(OBJECT_ID('tempdb..#tran_decline_notice') IS NOT NULL) DROP TABLE #tran_decline_notice;
  select * into #tran_decline_notice from (select * from #trans where tran_type1 = 'P' and tran_type2 = 'O') a
    
  ;with mycte as (
    
 select t.acct_nbr    ,t.tran_type1,    t.tran_amt, t.tran_id , d.tran_id as tran_id_decline
  , ROW_NUMBER()OVER ( PARTITION by t.acct_nbr,    t.tran_amt,t.tran_type1,t.tran_id Order BY t.tran_id  ) rn1 
     
    
      from  #trans t
      left join #tran_decline_notice d
      on t.acct_nbr = d.acct_nbr and t.tran_amt = d.tran_amt
      where DATEDIFF(ss,t.tran_dt,d.tran_dt) between 0 and 60 
      and t.tran_type1 = 'A' 
      and d.tran_type1 = 'P' 
      and d.tran_type2 = 'O'
      )
      ,mycte1 as (
      select  acct_nbr    , tran_type1, tran_amt,  tran_id ,  tran_id_decline, rn1,
   ROW_NUMBER()OVER ( PARTITION by  acct_nbr, tran_amt, tran_type1,tran_id_decline Order BY  cast(right(tran_id_decline,2) as int), rn1  ) rn2 
     
  from mycte
   )
  ,mycte2 as (
  select  acct_nbr    , tran_type1, tran_amt,  tran_id ,  tran_id_decline  
  from mycte1
  where rn2=1
 )
     
    
      Merge #trans t
       using mycte2 m on m.tran_id=t.tran_id  
       WHen matched then
       Update 
       Set   tran_id_decline = m.tran_id_decline;
    
  select * from  #trans
    
  /*
  acct_nbr    tran_dt    tran_amt    tran_type1    tran_type2    tran_id    tran_id_decline
 111    2022-05-08 11:08:47.257    1.00    C    X    ID-01    NULL
 111    2022-05-08 11:08:47.257    1.00    A        ID-02    NULL
 111    2022-05-09 11:10:47.257    1.00    A        ID-03    ID-04
 111    2022-05-09 11:11:11.257    1.00    P    O    ID-04    NULL
 111    2022-05-09 13:11:33.123    1.00    A    X    ID-05    NULL
 111    2022-05-09 14:10:47.257    1.00    A        ID-06    ID-07
 111    2022-05-09 14:11:17.257    1.00    P    O    ID-07    NULL
 111    2022-05-09 14:11:48.257    1.00    P    O    ID-08    NULL
 222    2022-05-10 11:09:47.257    1.00    A        ID-09    ID-11
 222    2022-05-10 11:09:47.257    1.00    A        ID-10    ID-12
 222    2022-05-10 11:09:57.257    1.00    P    O    ID-11    NULL
 222    2022-05-10 11:09:57.257    1.00    P    O    ID-12    NULL
 333    2022-05-10 11:09:47.257    1.00    A        ID-13    ID-15
 333    2022-05-10 11:09:47.257    1.00    A        ID-14    NULL
 333    2022-05-10 11:09:57.257    1.00    P    O    ID-15    NULL
    
  */
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

RyanAbbey-0701 avatar image
0 Votes"
RyanAbbey-0701 answered RyanAbbey-0701 commented

Taking a different approach... for each "P/O", identify the in-scope "A" records and how many "P/O" records there are between the "A" and the current "P/O"


 select 
 a.tran_id,  case when
  row_number() over (partition by po.tran_id order by a.tran_id)  = (select count(*) from #trans ppo where po.acct_nbr = ppo.acct_nbr and po.tran_amt = ppo.tran_amt and datediff(ss, ppo.tran_dt, po.tran_dt) between 0 and 60
   and ppo.tran_type1 = 'P' and ppo.tran_type2 = 'O' and ppo.tran_id <= po.tran_id and ppo.tran_id > a.tran_id) then po.tran_id end as DeclinedBy
 from #trans po
  inner join #trans a on po.acct_nbr = a.acct_nbr and po.tran_amt = a.tran_amt and datediff(ss, a.tran_dt, po.tran_dt) between 0 and 60 and a.tran_type1 = 'A'
 where po.tran_type1 = 'P' and po.tran_type2 = 'O'

Note, you will need to drop the null "DeclinedBy" records - your "ID-09" and "ID-10" will appear twice, one with a declined identifier and one without but it's the "with" that are relevant

How often are you planning on running? Hitting a billion rows would have me slightly worried with multiple scans so hopefully a one-off update with good indexing!

· 5
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

The tran_id will not always be in sequential order. It is a random value. I just made it sequential for this example so that it was easy to review the results. I tried substituting tran_dt for tran_id in the <= >but that did not work - "ppo.tran_dt <= po.tran_dt and ppo.tran_dt > a.tran_dt) then po.tran_id end as DeclinedBy". Not sure why it did not work. I could add a seq number by datetime, but not sure what that would accomplish. Any thoughts?

0 Votes 0 ·
JingyangLi avatar image JingyangLi brendagrossnickle-6913 ·

We have to find a column we can define sequence for the dataset. Can you think about it, whether it is the tran_id( not the case any more) or something else in your business case. trans_dt along with tran_types to construct a sequence.
If we cannot define a sequence for the dataset within to be used in ORDER BY clause, it would be hard to reach your goal.
Can you modify your source data to see whether we can find another way? (not depending on tran_id sequence any more), mostly the trans_dt column values in a meaningful way.
Thanks.

0 Votes 0 ·
RyanAbbey-0701 avatar image RyanAbbey-0701 brendagrossnickle-6913 ·

You will need some kind of sequencing in order to target single rows for update, is there anything in the table that provides this?

The date field won't work as you have multiple rows with the same date

0 Votes 0 ·

Transaction_ID is unique. So it can be used for update. But transaction_id is not in sorted order by transaction_dt. So I was confused by this code "ppo.tran_id <= po.tran_id and ppo.tran_id > a.tran_id" thought that i could use tran_dt in place of tran_id, but that did not work.

0 Votes 0 ·
Show more comments
JingyangLi avatar image
0 Votes"
JingyangLi answered

--I have seen the issue you pointed out.
--With your original dataset, we create a rowNumber to track a sequence as tie-breaker.

--Here is the modified version. Not tested for billion rows!
IF (OBJECT_ID('tempdb..#trans') IS NOT NULL)
DROP TABLE #trans;
CREATE TABLE #trans
(
acct_nbr VARCHAR(128)
, tran_dt DATETIME
, tran_amt DECIMAL(38, 2)
, tran_type1 VARCHAR(128)
, tran_type2 VARCHAR(128)
, tran_id VARCHAR(128)
, tran_id_decline VARCHAR(128)
);

  INSERT INTO
      #trans (acct_nbr, tran_dt, tran_amt, tran_type1, tran_type2, tran_id)
  VALUES
      ('111', '2022-05-08 11:08:47.257', '1.00', 'C', 'X', 'ID-01')
      , ('111', '2022-05-08 11:08:47.257', '1.00', 'A', '', 'ID-02')
      , ('111', '2022-05-09 11:10:47.257', '1.00', 'A', '', 'ID-03')
      , ('111', '2022-05-09 11:11:11.257', '1.00', 'P', 'O', 'ID-04')
      , ('111', '2022-05-09 13:11:33.123', '1.00', 'A', 'X', 'ID-05')
      , ('111', '2022-05-09 14:10:47.257', '1.00', 'A', '', 'ID-06')
      , ('111', '2022-05-09 14:11:17.257', '1.00', 'P', 'O', 'ID-07')
      , ('111', '2022-05-09 14:11:48.257', '1.00', 'P', 'O', 'ID-08')
      , ('222', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-09')    
      , ('222', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-10')
      , ('222', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-11')
      , ('222', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-12')
      , ('333', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-13')
      , ('333', '2022-05-10 11:09:47.257', '1.00', 'A', '', 'ID-14')
      , ('333', '2022-05-10 11:09:57.257', '1.00', 'P', 'O', 'ID-15');
        
        
  IF(OBJECT_ID('tempdb..#tran_decline_notice') IS NOT NULL) DROP TABLE #tran_decline_notice;
   select *, IDENTITY( int ) seq into #tran_decline_notice from (select  * from #trans where tran_type1 = 'P' and tran_type2 = 'O') a
        
   ;with mycte as (
        
  select  t.acct_nbr    ,t.tran_type1,    t.tran_amt, t.tran_id , d.tran_id as tran_id_decline  
   , ROW_NUMBER()OVER ( PARTITION by t.acct_nbr,    t.tran_amt,t.tran_type1,t.tran_id Order BY t.tran_id  ) rn0 
         
  , ROW_NUMBER()OVER (Order BY t.tran_id,d.tran_id ) seq  ----added
       from  #trans t
       left join #tran_decline_notice d
       on t.acct_nbr = d.acct_nbr and t.tran_amt = d.tran_amt
       where DATEDIFF(ss,t.tran_dt,d.tran_dt) between 0 and 60 
       and t.tran_type1 = 'A' 
       and d.tran_type1 = 'P' 
       and d.tran_type2 = 'O'
       )
       ,mycte1 as (
       select  acct_nbr    , tran_type1, tran_amt,  tran_id ,  tran_id_decline, rn0 ,seq 
       , ROW_NUMBER()OVER ( PARTITION by acct_nbr,tran_amt,tran_type1,tran_id Order BY tran_id_decline  ) rn1 
    ,ROW_NUMBER()OVER ( PARTITION by  acct_nbr, tran_amt, tran_type1,tran_id_decline 
    Order BY  tran_id_decline, rn0, seq  ) rn2 
      
         
   from mycte
    )
    
   ,mycte2 as (
   select  acct_nbr, tran_type1, tran_amt,  tran_id ,  tran_id_decline  
   from mycte1
   where rn1-rn2=0
  )
    
        
       Merge #trans t
        using mycte2 m on m.tran_id=t.tran_id  
        WHen matched then
        Update 
        Set   tran_id_decline = m.tran_id_decline;
        
  select * from  #trans
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.