Group by column over partition ordered by date

diego gutierrez 161 Reputation points
2022-07-05T22:23:28.217+00:00

Hi Everyone.

I need a script to get the first and second null value from a some records grouped by a column and ordered by date.

217818-sqlscenario.jpg

I need the 311888 and 123477 itinerary groups because they have the first and second LegSegment as null value ordered by CreationDate, something like:
select * from Test group by Itinerary order by CreationDate where (first LegSegment and secondLegSegment are null ordered by CreationDate)

This is a query example to build the scenario:

CREATE TABLE [dbo].[Test](  
	[Id] [int] IDENTITY(1,1) NOT NULL,  
	[Itinerary] [int] NULL,  
	[LegSegment] [int] NULL,  
	[CreationDate] [datetime] NULL,  
 CONSTRAINT [PK_Test2] PRIMARY KEY CLUSTERED   
(  
	[Id] ASC  
))  
  
INSERT INTO [dbo].[Test]  
           ([Itinerary]  
           ,[LegSegment]  
           ,[CreationDate])  
     VALUES (311888,NULL,'2022-05-20 12:43:34.090'),(311888,NULL,'2022-05-21 12:43:34.090'), (311888,817430,'2022-05-22 12:43:34.090')  
	 , (271888,NULL,'2022-05-20 12:43:34.090'),(271888,256321,'2022-05-21 12:43:34.090'), (271888,NULL,'2022-05-22 12:43:34.090')  
	 , (312288,NULL,'2022-05-20 12:43:34.090'),(312288,345689,'2022-05-21 12:43:34.090'), (312288,817430,'2022-05-22 12:43:34.090')  
	 , (123456,345678,'2022-05-20 12:43:34.090'),(123456,344657,'2022-05-21 12:43:34.090'), (123456,817430,'2022-05-22 12:43:34.090')  
	 , (777555,NULL,'2022-05-20 12:43:34.090'),(777555,555778,'2022-05-21 12:43:34.090'), (777555,456345,'2022-05-22 12:43:34.090')  
	 , (123477,NULL,'2022-05-20 12:43:34.090'),(123477,NULL,'2022-05-21 12:43:34.090'), (123477,567897,'2022-05-22 12:43:34.090')  
	 , (123477,567846,'2022-05-23 12:43:34.090')  
GO  

Thanks in advance.

Developer technologies Transact-SQL
{count} votes

Accepted answer
  1. Jingyang Li 5,896 Reputation points Volunteer Moderator
    2022-07-06T03:48:18.34+00:00
     CREATE TABLE [dbo].[Test](  
         [Id] [int] IDENTITY(1,1) NOT NULL,  
         [Itinerary] [int] NULL,  
         [LegSegment] [int] NULL,  
         [CreationDate] [datetime] NULL,  
      CONSTRAINT [PK_Test2] PRIMARY KEY CLUSTERED   
     (  
         [Id] ASC  
     ))  
          
     INSERT INTO [dbo].[Test]  
                ([Itinerary]  
                ,[LegSegment]  
                ,[CreationDate])  
          VALUES (311888,NULL,'2022-05-20 12:43:34.090'),(311888,NULL,'2022-05-21 12:43:34.090'), (311888,817430,'2022-05-22 12:43:34.090')  
          , (271888,NULL,'2022-05-20 12:43:34.090'),(271888,256321,'2022-05-21 12:43:34.090'), (271888,NULL,'2022-05-22 12:43:34.090')  
          , (312288,NULL,'2022-05-20 12:43:34.090'),(312288,345689,'2022-05-21 12:43:34.090'), (312288,817430,'2022-05-22 12:43:34.090')  
          , (123456,345678,'2022-05-20 12:43:34.090'),(123456,344657,'2022-05-21 12:43:34.090'), (123456,817430,'2022-05-22 12:43:34.090')  
          , (777555,NULL,'2022-05-20 12:43:34.090'),(777555,555778,'2022-05-21 12:43:34.090'), (777555,456345,'2022-05-22 12:43:34.090')  
          , (123477,NULL,'2022-05-20 12:43:34.090'),(123477,NULL,'2022-05-21 12:43:34.090'), (123477,567897,'2022-05-22 12:43:34.090')  
          , (123477,567846,'2022-05-23 12:43:34.090')  
     GO  
      
      
     select * from  [dbo].[Test]  
      
     ;with mycte as (  
      
     select *,row_number() Over(partition by  Itinerary  
     Order by CreationDate) rn from Test   
    )  
    ,mycte2 as (  
    select Itinerary , Sum(case when rn=1 and LegSegment  is null then 1  
     when rn=2 and LegSegment is null then 1 else 0 end) cnt   
     from mycte  
     Group by Itinerary)  
      
      select * from test where Itinerary IN    
      (select Itinerary from mycte2 where cnt=2)  
       
       
      
      
       
     drop TABLE [dbo].[Test]  
    
    1 person found this answer helpful.

2 additional answers

Sort by: Most helpful
  1. Bert Zhou-msft 3,436 Reputation points
    2022-07-06T03:40:15.21+00:00

    Hi,@diego gutierrez
    Try this solution:

     ;with cte as  
      (  
        select  [Id] ,[Itinerary],[LegSegment] ,[CreationDate] ,  
    	rn=ROW_NUMBER()over(partition by [Itinerary] order by [CreationDate] asc)  
    	from dbo.Test  
    	 where  [Itinerary]=123477 or [Itinerary]=311888  
    	  
      )  
      select[Id] ,[Itinerary],[LegSegment] ,[CreationDate]  from cte   
     where  rn<=2 and [LegSegment] is null   
    

    217928-image.png

    Bert Zhou

    0 comments No comments

  2. Viorel 122.5K Reputation points
    2022-07-06T05:05:56.36+00:00

    If the third value must not be null, then try this query:

    ;  
    with Q as  
    (  
        select *,  
            row_number() over (partition by Itinerary order by CreationDate) rn,  
            lag(LegSegment) over (partition by Itinerary order by CreationDate) p,  
            lead(LegSegment) over (partition by Itinerary order by CreationDate) n1,  
            lead(LegSegment, 2) over (partition by Itinerary order by CreationDate) n2  
        from Test   
    )  
    select Id, Itinerary, LegSegment, CreationDate  
    from Q  
    where (rn = 1 and LegSegment is null and n1 is null and n2 is not null)   
    or    (rn = 2 and LegSegment is null and p is null and n1 is not null)  
    order by Itinerary, CreationDate  
    

    You can simplify it if the third row does not matter.

    0 comments No comments

Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.