Aggregation weekly data from daily field

milan i 81 Reputation points
2021-10-12T06:00:36.057+00:00
CREATE TABLE #MYSRC
(
ID_1 VARCHAR(10),
ID_2 int,
ID_3 VARCHAR(10),
Pay1 varchar(6),
Pay2 varchar(6),
Status varchar(10),
FileDate DATE
)

--- SOME MORE detail INFO about data
--- WE HAVE ABOUT 2 YEARS WORTH OF DAILY FEEDS DATA LOADED TO ONETABLE(MYSRC) last column FileDate shows what day is the file, the granularity here is daily.
--- USUALLY EACH DAY FILE HAVE ABOUT 30K RECORDS, IN WHICH IT INCLUDES OLD RECORDS AS IS, NEW ENTRIES ADDED (by considering id1,id2,id3 combination) , FOR SOME MEMBERS IT MAY CHANGE OTHER COLUMN VALUES.
--- WE FOUND MEMBER BASED ON THREE id FIELDS, IF ID_1,2,3 CHANGES THEN IT IS CONSIDER AS NEW MEMBER.
--- HERE WOULD LIKE TO KNOW HOW MANY NEWMEMBERS WERE ADDED PER EACH week (weekly AGGREGATION)

---Inserts 5 records in that file, 3 old 2 new (in 2 one is just id change one is
INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '09/20/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Sep
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '09/20/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Sep
INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '09/20/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Sep
INSERT INTO #MYSRC VALUES ('7120', 250, '3241', '120.75','0.00','EN-50', '09/03/2021') --New RECORDS because of ID1 change,
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '09/03/2021') ---Old RECORDS/member
INSERT INTO #MYSRC VALUES ('1122', 250, '3243', '20.75','0.00','EN-25', '09/02/2021') --old record
INSERT INTO #MYSRC VALUES ('1123', 252, '3244', null,null,'EN-25', '09/02/2021') --old record/member
INSERT INTO #MYSRC VALUES ('1122', 27, '3243', '20.75','0.00','EN-25', '09/01/2021') --New Set of ID1,2,3 not exist in earlier data so count it as new member for SEP
INSERT INTO #MYSRC VALUES ('1123', 252, '3244', null,null,'EN-25', '09/01/2021') --New Set of ID1,2,3 not exist in earlier data so count it as new member for SEP

---- August Data below - FileDate (last column)
INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '08/31/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for Aug
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for Aug
INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '08/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for Aug
INSERT INTO #MYSRC VALUES ('3120', 250, '777', '120.75','0.00','EN-25', '08/30/2021') --New Set of ID1,2,3 not exist in earlier data so count it as new member for Aug
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/30/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July
INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '08/29/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for July
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/29/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for July
INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','EN-25', '08/29/2021') -- exist same set (id1,2,3) in earlier data, so not a new member for July
INSERT INTO #MYSRC VALUES ('3120', 250, '888', '120.75','0.00','EN-25', '08/18/2021') ---same set (id1,2,3) not exist earlier, appeared first in Aug 2021. so count it
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '08/17/2021')--- exist same set (id1,2,3) in earlier data, so not a new member for Aug
INSERT INTO #MYSRC VALUES ('1121', 37, '3242', null,'3.00','FU-25', '08/11/2021') --- New Set of ID1,2,3 not exist in earlier data so count it as new member for AUG
INSERT INTO #MYSRC VALUES ('3122', 36, '100', '20.75','0.00','EN-25', '08/11/2021') ---New Set of ID1,2,3 not exist in earlier data so count it as new member for AUG

---- July Data below
INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '07/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '07/31/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July
INSERT INTO #MYSRC VALUES ('9122', 250, '3243', '20.75','0.00','FU-25', '07/31/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July
INSERT INTO #MYSRC VALUES ('3120', 250, '777', '120.75','0.00','FU-25', '07/20/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '07/20/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July
INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July
INSERT INTO #MYSRC VALUES ('6121', 251, '3242', null,'3.00','FU-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July
INSERT INTO #MYSRC VALUES ('3122', 250, '3243', '20.75','0.00','FU-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July
INSERT INTO #MYSRC VALUES ('3120', 250, '888', '120.75','0.00','EN-25', '07/05/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July
INSERT INTO #MYSRC VALUES ('5121', 251, '3242', null,'3.00','FU-25', '07/05/2021')-- New Set of ID1,2,3 not exist in earlier data so count it as new member for July
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','FU-25', '07/01/2021') --- exist same set (id1,2,3) in earlier data, so not a new member for July
INSERT INTO #MYSRC VALUES ('4122', 250, '100', '20.75','0.00','EN-25', '07/01/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

---- Jun
INSERT INTO #MYSRC VALUES ('3120', 250, '888', '120.75','0.00','EN-25', '06/05/2021')
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','EN-25', '06/05/2021')
INSERT INTO #MYSRC VALUES ('1121', 251, '3242', null,'3.00','FU-25', '06/01/2021')
INSERT INTO #MYSRC VALUES ('3122', 250, '100', '20.75','0.00','EN-25', '06/01/2021') --- assume jun 2021 is Starting population

expected output as below

first column week values (for every week current to min(filedate) week. i just choose a format but at your convenience pleas choose anyway you like.,

second column is New numbers as explained above only counts new numbers byid1, id2, id3 fields values not exist earlier

third column out of second column (new members) how many were first two characters of status column = "FU" then count else 0

Forth column out of second column (new members) how many were first two characters of status column = "EN" then count else 0

Week                                                 NewMembers           FUMembersCounts                 EUMembersCount
Oct-03-2021 To Oct-09-2021            0                                      0                                         0
Sep-26-2021 To Oct-02-2021            0                                      0                                         0
Sep-19-2021 To Sep-25-2021           0                                       0                                         0
Sep-12-2021 To Sep-18-2021           0                                       0                                         0
Sep-05-2021 To Sep-11-2021           0                                       0                                         0
Aug-30-2021 To Sep-04-2021          5                                        3                                         2

please help me

Thank you
Asiti

SQL Server
SQL Server
A family of Microsoft relational database management and analysis systems for e-commerce, line-of-business, and data warehousing solutions.
12,693 questions
Transact-SQL
Transact-SQL
A Microsoft extension to the ANSI SQL language that includes procedural programming, local variables, and various support functions.
4,552 questions
{count} votes

Accepted answer
  1. Guoxiong 8,126 Reputation points
    2021-10-14T15:54:45.313+00:00

    Hi @milan i ,

    I updated the script based on your further requirements. I use the month name based on the week start date, but you can change it to the week end date if you prefer it. You can also remove the LEFT() function from the month name to get the whole month name, i.e., 2021-September instead of 2021-Sep:

    DECLARE @startDate datetime;  
    DECLARE @endDate datetime;  
    SELECT @startDate = MIN(FileDate), @endDate = MAX(FileDate) FROM #MYSRC;  
      
    ;WITH CTE_Calendar_Date AS (  
    	SELECT @startDate AS [Date]  
    	UNION ALL  
    	SELECT DATEADD(day, 1, [Date]) AS [Date]  
    	FROM CTE_Calendar_Date  
    	WHERE [Date] <= @endDate  
    ),  
    CTE_Calendar_WeekNumber AS (  
    	SELECT [Year], WeekNumber, MIN([Date]) AS WeekStart, MAX([Date]) AS WeekEnd  
    	FROM (  
    		SELECT YEAR([Date]) AS [Year], DATEPART(WEEK, [Date]) AS WeekNumber, [Date]  
    		FROM CTE_Calendar_Date  
    	) AS d  
    	GROUP BY [Year], WeekNumber  
    ),  
      
    CTE_New_Number AS (  
    	SELECT *   
    	FROM (  
    		SELECT ID_1,   
    			   ID_2,   
    			   ID_3,   
    			   [Status],   
    			   Year(FileDate) AS [Year],   
    			   DATEPART(WEEK, FileDate) AS WeekNumber,   
    			   ROW_NUMBER() OVER(PARTITION BY ID_1, ID_2, ID_3 ORDER BY FileDate) AS Ranked  
    		FROM #MYSRC   
    	) AS t  
    	WHERE t.Ranked = 1  
    )  
      
    -- Month name is based on WeekStart date which can be changed to WeekEnd  
    -- Remove LEFT() function to get whole month name  
    SELECT CAST(c1.[Year] AS char(4)) + '-' + LEFT(DATENAME(month, c1.WeekStart), 3) AS [Year-MonthName],   
    	   REPLACE(REPLACE(CONVERT(varchar(12), c1.WeekStart, 107), ', ', '-'), ' ', '-') + ' To ' + REPLACE(REPLACE(CONVERT(varchar(12), c1.WeekEnd, 107), ', ', '-'), ' ', '-') AS [Week],   
    	   ISNULL(c2.NewMembers, 0) AS NewMembers,   
    	   ISNULL(FUMembersCounts, 0) AS FUMembersCounts,   
    	   ISNULL(EUMembersCount, 0) AS EUMembersCount   
    FROM CTE_Calendar_WeekNumber AS c1  
    LEFT JOIN (  
    	SELECT [Year],  
    		   WeekNumber,   
    		   COUNT(*) AS NewMembers,   
    		   SUM(CASE WHEN LEFT([Status], 2) = 'FU' THEN 1 ELSE 0 END) AS FUMembersCounts,  
    		   SUM(CASE WHEN LEFT([Status], 2) = 'EN' THEN 1 ELSE 0 END) AS EUMembersCount  
    	FROM CTE_New_Number   
    	GROUP BY [Year], WeekNumber  
    ) AS c2 ON c1.[Year] = c2.[Year] AND c1.WeekNumber = c2.WeekNumber  
    ORDER BY c1.[Year] DESC, c1.WeekNumber DESC  
    OPTION (MAXRECURSION 0);  
    

4 additional answers

Sort by: Most helpful
  1. milan i 81 Reputation points
    2021-10-12T13:15:03.793+00:00

    Good Morning @EchoLiu-MSFT

    Apologize for not being clear in first post.

    here is updates for your questions

    1) it need to calculate all the way till min(Filedate) covered week. in the table. for example min (file date) is Feb-07-2018 so my last row will be Feb-04-2018 To Feb-10-2018.

    2) earlier means with combination of ID1, ID2, Id3 columns first appeared week will count it. for example in below record 7120, 250, 3241 (id1,2,3) combination appeared first on Sep 3rd 2021. so new member count for this week (Aug 29 2021 To Sep 4 2021).

    so in other words just when ever ID1,ID2,Id3 added first t

    INSERT INTO #MYSRC VALUES ('7120', 250, '3241', '120.75','0.00','EN-50', '09/03/2021') --New RECORDS because of ID1 change,

    another example combination (3120, 250, 3241) appeared multiple times but need to consider only first appearance so on July 10. count this new member to July 10 fallen weeek, remain all other ones not countable.

    INSERT INTO #MYSRC VALUES ('3120', 250, '3241', '120.75','0.00','EN-25', '07/10/2021') -- New Set of ID1,2,3 not exist in earlier data so count it as new member for July

    Hope I explained, please let me know if you wish me to clarify further.

    Thank you much in advance
    asiti
    Thank you
    Asiti


  2. Guoxiong 8,126 Reputation points
    2021-10-12T18:22:40.97+00:00

    Try this:

    DECLARE @startDate datetime;
    DECLARE @endDate datetime;
    SELECT @startDate = MIN(FileDate), @endDate = MAX(FileDate) FROM #MYSRC;
    
    ;WITH CTE_Calendar_Date AS (
        SELECT @startDate AS [Date]
        UNION ALL
        SELECT DATEADD(day, 1, [Date]) AS [Date]
        FROM CTE_Calendar_Date
        WHERE [Date] <= @endDate
    ),
    CTE_Calendar_WeekNumber AS (
        SELECT [Year], WeekNumber, MIN([Date]) AS WeekStart, MAX([Date]) AS WeekEnd
        FROM (
            SELECT YEAR([Date]) AS [Year], DATEPART(WEEK, [Date]) AS WeekNumber, [Date]
            FROM CTE_Calendar_Date
        ) AS d
        GROUP BY [Year], WeekNumber
    ),
    CTE_New_Number AS (
        SELECT * 
        FROM (
            SELECT ID_1, 
                   ID_2, 
                   ID_3, 
                   [Status], 
                   Year(FileDate) AS [Year], 
                   DATEPART(WEEK, FileDate) AS WeekNumber, 
                   ROW_NUMBER() OVER(PARTITION BY ID_1, ID_2, ID_3 ORDER BY FileDate) AS Ranked
            FROM #MYSRC 
        ) AS t
        WHERE t.Ranked = 1
    )
    
    SELECT c1.[Year], REPLACE(REPLACE(CONVERT(varchar(12), c1.WeekStart, 107), ' ', '-'), ',', '-') + ' To ' + REPLACE(REPLACE(CONVERT(varchar(12), c1.WeekEnd, 107), ' ', '-'), ',', '-') AS [Week], 
           ISNULL(c2.NewMembers, 0) AS NewMembers, 
           ISNULL(FUMembersCounts, 0) AS FUMembersCounts, 
           ISNULL(EUMembersCount, 0) AS EUMembersCount 
    FROM CTE_Calendar_WeekNumber AS c1
    LEFT JOIN (
        SELECT [Year],
               WeekNumber, 
               COUNT(*) AS NewMembers, 
               SUM(CASE WHEN LEFT([Status], 2) = 'FU' THEN 1 ELSE 0 END) AS FUMembersCounts,
               SUM(CASE WHEN LEFT([Status], 2) = 'EN' THEN 1 ELSE 0 END) AS EUMembersCount
        FROM CTE_New_Number 
        GROUP BY [Year], WeekNumber
    ) AS c2 ON c1.[Year] = c2.[Year] AND c1.WeekNumber = c2.WeekNumber
    ORDER BY c1.[Year] DESC, c1.WeekNumber DESC
    OPTION (MAXRECURSION 0);
    

  3. milan i 81 Reputation points
    2021-10-12T23:38:34.52+00:00

    Thank you
    @Guoxiong

    much appreciate your help, I am getting error when i am running with my data

    "The Statement terminated the maximum recursion 100 has been exhausted before statement completion"

    I have data from 2019 june onwards each day about 80K records.

    Can you please help, Thank you


  4. milan i 81 Reputation points
    2021-10-17T08:40:30.167+00:00

    Thank you again for your help
    @Guoxiong

    0 comments No comments