Share via

data profiling stored procedure errors

Laura Ijewere 41 Reputation points
2020-11-05T15:29:45.87+00:00

CREATE PROCEDURE stm_DataProfiling
@report TINYINT , --1 = 'ColumnDataProfiling', 2 = 'ColumnUniqueValues'
@SchemaName NVARCHAR(MAX) = N'stg',--this is schema
@ObjectlisttoSearch NVARCHAR(MAX)=N'Agency,Calender,Calender Dates,Fare Attributes,FareRules,FeedInfo,Frequencies,Locations,Routes,Shapes,stops,Stoptimes,Trips',-- these are the tables I want to profile
@ExcludeTables NVARCHAR(MAX) = N'',
@ExcludeColumns NVARCHAR(MAX) = N'',
@ExcludeDataType NVARCHAR(100) = N'',
@RestrictCharlength INT,
@RestrictNoOfUniqueValues INT
AS

BEGIN

SET NOCOUNT ON;
SET ANSI_WARNINGS OFF;
SET ANSI_NULLS ON;

SELECT @RestrictCharlength = IIF(@RestrictCharlength IS NULL OR @RestrictCharlength = '',100,@RestrictCharlength)
SELECT @RestrictNoOfUniqueValues = IIF(@RestrictNoOfUniqueValues IS NULL OR @RestrictNoOfUniqueValues = '',50,@RestrictNoOfUniqueValues)

DECLARE @TableColList TABLE (Id INT IDENTITY(1,1),Tbl NVARCHAR(128),colname NVARCHAR(200),ColType NVARCHAR(150))

IF ISNULL(@SchemaName,'stg') <> '' OR ISNULL(@ObjectlisttoSearch,'Agency,Calender,Calender Dates,Fare Attributes,FareRules,FeedInfo,Frequencies,Locations,Routes,Shapes,stops,Stoptimes,Trips') <> ''
BEGIN

INSERT @TableColList
SELECT DISTINCT CONCAT(SCHEMA_NAME(T.SCHEMA_ID),'.',T.name) TableName
,C.name
,CASE WHEN TY.is_user_defined = 1 THEN (SELECT name FROM sys.types
WHERE system_type_id = user_type_id
AND system_type_id = TY.system_type_id)
ELSE TY.name
END
FROM Sys.tables T
JOIN sys.columns C
ON T.object_id = C.object_id
JOIN sys.types TY
ON C.[user_type_id] = TY.[user_type_id]
-- Ignore the datatypes that are not required
WHERE TY.name NOT IN ('geography','varbinary','binary','text', 'ntext', 'image', 'hierarchyid', 'xml', 'sql_variant')
AND (Schema_name(T.schema_id) IN (SELECT value FROM STRING_SPLIT(@SchemaName, ','))
OR CONCAT(SCHEMA_NAME(T.SCHEMA_ID),'.',T.name) IN (SELECT value FROM STRING_SPLIT(@ObjectlisttoSearch, ',')))
AND (TY.name NOT IN (SELECT value FROM STRING_SPLIT(@ExcludeDataType, ','))
AND TY.name = TY.name)
AND (C.name NOT IN (SELECT value FROM STRING_SPLIT(@ExcludeColumns, ','))
AND C.name = C.name)
AND (CONCAT(SCHEMA_NAME(T.SCHEMA_ID),'.',T.name) NOT IN (SELECT value FROM STRING_SPLIT(@ExcludeTables, ','))
AND CONCAT(SCHEMA_NAME(T.SCHEMA_ID),'.',T.name) = CONCAT(SCHEMA_NAME(T.SCHEMA_ID),'.',T.name))

END ELSE

BEGIN

INSERT @TableColList
SELECT DISTINCT CONCAT(SCHEMA_NAME(T.SCHEMA_ID),'.',T.name) TableName
,C.name
,CASE WHEN TY.is_user_defined = 1 THEN (SELECT name FROM sys.types
WHERE system_type_id = user_type_id
AND system_type_id = TY.system_type_id)
ELSE TY.name
END
FROM Sys.tables T
JOIN sys.columns C
ON T.object_id = C.object_id
JOIN sys.types TY
ON C.[user_type_id] = TY.[user_type_id]
-- Ignore the datatypes that are not required
WHERE TY.name NOT IN ('geography','varbinary','binary','text', 'ntext', 'image', 'hierarchyid', 'xml', 'sql_variant')
AND (TY.name NOT IN (SELECT value FROM STRING_SPLIT(@ExcludeDataType, ','))
AND TY.name = TY.name)
AND (C.name NOT IN (SELECT value FROM STRING_SPLIT(@ExcludeColumns, ','))
AND C.name = C.name)
AND (CONCAT(SCHEMA_NAME(T.SCHEMA_ID),'.',T.name) NOT IN (SELECT value FROM STRING_SPLIT(@ExcludeTables, ','))
AND CONCAT(SCHEMA_NAME(T.SCHEMA_ID),'.',T.name) = CONCAT(SCHEMA_NAME(T.SCHEMA_ID),'.',T.name))

END

DROP TABLE IF EXISTS #Final
CREATE TABLE #Final (Id BIGINT IDENTITY(1,1),TableName NVARCHAR(128),ColumnName NVARCHAR(200),ColumnType NVARCHAR(150),ColumnUniqueValues NVARCHAR(MAX),UniqueValueOccurance BIGINT,MissingDataRowCount BIGINT,MinValue NVARCHAR(MAX),MaxValue NVARCHAR(MAX),SpecialCharacters BIGINT,LeadingTrailingSpaces BIGINT,MinFieldValueLen BIGINT,MaxFieldValueLen BIGINT,Comment NVARCHAR(MAX))

DROP TABLE IF EXISTS #temp
CREATE TABLE #temp (Id BIGINT IDENTITY(1,1),TableName NVARCHAR(128),ColumnName NVARCHAR(200),Cnt BIGINT,MaxLen BIGINT,MinLen BIGINT,MissingDataCount BIGINT,MinValue NVARCHAR(MAX),MaxValue NVARCHAR(MAX),SpecialCharacters BIGINT,LeadingTrailingSpaces BIGINT)

DECLARE @I INT = 1
,@alenzi NVARCHAR(MAX) = N''
,@tblname NVARCHAR(128)
,@Colname NVARCHAR(200)
,@ColType NVARCHAR(150)
,@CNT BIGINT
,@MaxLen BIGINT
,@MinLen BIGINT
,@MissingData BIGINT
,@MaxVal NVARCHAR(MAX) = N''
,@MinVal NVARCHAR(MAX) = N''
,@MinMAxSQL NVARCHAR(MAX) = N''
,@SpecialCharacters BIGINT
,@LeadingTrailingSpaces BIGINT

WHILE @I <= (SELECT MAX(Id) FROM @TableColList)
BEGIN

SELECT @Colname = QUOTENAME(colname),@tblname = Tbl,@ColType = ColType FROM @TableColList
WHERE Id = @I

SELECT @MinMAxSQL = CASE WHEN @ColType IN ('date','datetime','datetime2','datetimeoffset','time','timestamp')
THEN CONCAT(' FORMAT (MIN(',@Colname,'), ''yyyy-MM-dd,hh:mm:ss'') MinValue,FORMAT (MAX(',@Colname,'), ''yyyy-MM-dd,hh:mm:ss'') MAXValue')
WHEN @ColType = 'bit'
THEN '0 AS MinValue,1 AS MaxValue'
ELSE CONCAT('CASE WHEN EXISTS (SELECT 1 FROM ',@tblname,' WHERE ISNUMERIC(',@Colname,') = 0)','THEN NULL ELSE MIN(',@Colname,') END MinValue
,CASE WHEN EXISTS (SELECT 1 FROM ',@tblname,' WHERE ISNUMERIC(',@Colname,') = 0)','THEN NULL ELSE MAX(',@Colname,') END MAXValue')
END

EXEC (';WITH CTE AS (
SELECT COUNT_BIG(DISTINCT '+@Colname+') Cnt
,MAX(LEN('+@Colname+')) MaxLen
,MIN(LEN('+@Colname+')) MinLen
,SUM(CASE WHEN '+@Colname+' IS NULL OR CAST('+@Colname+' AS VARCHAR(MAX)) = '''' THEN 1 ELSE 0 END) MissingData
,'+@MinMAxSQL+'
,CASE WHEN '''+@ColType+''' IN (''nvarchar'',''varchar'',''nchar'',''char'')
THEN SUM(CASE WHEN '+@Colname+' LIKE ''%[^a-zA-Z0-9 ]%'' THEN 1 ELSE 0 END)
ELSE NULL END SpecialCharacters
,CASE WHEN '''+@ColType+''' IN (''nvarchar'',''varchar'',''nchar'',''char'')
THEN SUM(CASE WHEN ISNULL(DATALENGTH('+@Colname+'),'''') = ISNULL(DATALENGTH(RTRIM(LTRIM('+@Colname+'))),'''') THEN 0 ELSE 1 END)
ELSE NULL END LeadingTrailingSpaces
FROM '+@tblname+' )
INSERT #temp(TableName,ColumnName,Cnt,MaxLen,MinLen,MissingDataCount,MinValue,MaxValue,SpecialCharacters,LeadingTrailingSpaces)
SELECT '''+@tblname+''','''+@Colname+''',Cnt,ISNULL(MaxLen,0) MaxLen,ISNULL(MinLen,0) MinLen,ISNULL(MissingData,0) MissingData,MinValue,MAXValue
,ISNULL(SpecialCharacters,0) SpecialCharacters,ISNULL(LeadingTrailingSpaces,0) LeadingTrailingSpaces FROM CTE')

SELECT @CNT = Cnt,@MaxLen = MaxLen,@MinLen = MinLen,@MissingData = MissingDataCount,@MinVal=MinValue,@MaxVal=MAXValue
,@SpecialCharacters = SpecialCharacters ,@LeadingTrailingSpaces = LeadingTrailingSpaces
FROM #temp
WHERE Id = @I AND TableName = @tblname AND ColumnName = @Colname

IF ISNULL(@MaxLen ,'') < @RestrictCharlength AND ISNULL(@CNT ,'') < @RestrictNoOfUniqueValues
BEGIN

  SET @SQL = CONCAT('SELECT ''',@tblname,''',''',@Colname,''',''',@ColType,''',',@Colname,',COUNT_BIG(',@Colname,'),',@MissingData,',''',@MinVal,''',''',@MaxVal,''',',@SpecialCharacters,',',@LeadingTrailingSpaces,',',@MinLen,',',@MaxLen,',','''','This field has Unique values = ',@Cnt,'''',' FROM ',@tblname,' GROUP BY ',@Colname)  
  INSERT #Final (TableName,ColumnName,ColumnType,ColumnUniqueValues,UniqueValueOccurance,MissingDataRowCount,MinValue,MaxValue,SpecialCharacters,LeadingTrailingSpaces,MinFieldValueLen,MaxFieldValueLen,Comment)  
  EXEC(@SQL)  
 
  END  

ELSE
BEGIN

  INSERT #Final (TableName,ColumnName,ColumnType,MissingDataRowCount,MinValue,MaxValue,SpecialCharacters,LeadingTrailingSpaces,MinFieldValueLen,MaxFieldValueLen,Comment)  
  SELECT @tblname,@Colname,@ColType,@MissingData,@MinVal,@MaxVal,@SpecialCharacters,@LeadingTrailingSpaces,@MinLen,@MaxLen,CONCAT('This field has Unique values = ',@Cnt)  
  END  

SET @I = @I + 1
END

IF @report = 1
BEGIN

SELECT DISTINCT TableName,ColumnName,ColumnType,MissingDataRowCount,MinValue,MaxValue,SpecialCharacters
,LeadingTrailingSpaces,MinFieldValueLen,MaxFieldValueLen,Comment
FROM #Final
ORDER BY TableName,ColumnName

END

IF @report = 2
BEGIN

SELECT TableName,ColumnName,ColumnUniqueValues,UniqueValueOccurance,Comment
FROM #Final
ORDER BY TableName,ColumnName

END

END

Developer technologies | Transact-SQL
Developer technologies | Transact-SQL

A Microsoft extension to the ANSI SQL language that includes procedural programming, local variables, and various support functions.


2 answers

Sort by: Most helpful
  1. Guoxiong 8,221 Reputation points
    2020-11-07T00:17:26.607+00:00

    I think the problem is in the following code:

    IF ISNULL(@MaxLen ,'') < @RestrictCharlength AND ISNULL(@CNT ,'') < @RestrictNoOfUniqueValues

    Both variables @MaxLen and @CNT are the big integers, but when you use ISNULL() function, you try to use an empty string to replace NULL for the integer. That was why you got the error.

    Was this answer helpful?


  2. Erland Sommarskog 134.7K Reputation points MVP Volunteer Moderator
    2020-11-06T22:17:04.877+00:00

    It seems to be this line:

    DROP TABLE IF EXISTS #Final
    

    This syntax was added in SQL 2016, so I would guess you are an older version. What does SELECT @@version report?

    I don't know why you added this statement here; I rarely precede my CREATE TABLE statemetns for temp tables with DROP statements. So if you don't have any special reason to have it there, just remove it.

    Was this answer helpful?

    0 comments No comments

Your answer

Answers can be marked as 'Accepted' by the question author and 'Recommended' by moderators, which helps users know the answer solved the author's problem.