Statistiche & FULLSCAN

Le statistiche sono importanti. SQL Server, se opportunamente configurato, crea e manutiene le statistiche automaticamente. Questo bellissimo articolo tratta in maniera esaustiva le statistiche: MSDN: Statistics Used by the Query Optimizer in Microsoft SQL Server 2008.

Ma come fare ad identificare le statistiche sottocampionate?

Potete provare con questo script che analizza tutti i DB di una istanza:

 SET NOCOUNT ON

IF (OBJECT_ID('tempdb..#tmpstat') IS NOT NULL)
    DROP TABLE #tmpstat
IF (OBJECT_ID('tempdb..#cmd') IS NOT NULL)
    DROP TABLE #cmd
IF (OBJECT_ID('tempdb..#tmpMismatch') IS NOT NULL)
    DROP TABLE #tmpMismatch
IF (OBJECT_ID('tempdb..#tmpstatDatabase') IS NOT NULL)
    DROP TABLE #tmpstatDatabase
        
CREATE TABLE #tmpstat(
    Name VARCHAR(255), 
    Updated DATETIME,
    [Rows] INT,
    [Rows Sampled] INT,
    [Steps] INT,
    [Density] INT,
    [Average key length] FLOAT,
    [String Index Filter] VARCHAR(255),
    [Expression] VARCHAR(255),
    [Unfiltered Rows] INT)
    
CREATE TABLE #tmpstatDatabase(
    DatabaseName VARCHAR(255),
    [Object] VARCHAR(255),
    Statistic VARCHAR(255), 
    Updated DATETIME,
    [Rows] INT,
    [Rows Sampled] INT,
    [Steps] INT,
    [Density] INT,
    [Average key length] FLOAT,
    [String Index Filter] VARCHAR(255),
    [Expression] VARCHAR(255),
    [Unfiltered Rows] INT)
    
SELECT * INTO #tmpMismatch FROM #tmpstatDatabase;    

CREATE TABLE #cmd(TsqlFullScan NVARCHAR(MAX))

DECLARE @dbname SYSNAME;
DECLARE @statement NVARCHAR(MAX);

DECLARE cDatabase CURSOR FOR
SELECT name FROM sys.databases
WHERE name NOT IN('master', 'msdb', 'tempdb', 'model')
ORDER BY name;

OPEN cDatabase;

FETCH NEXT FROM cDatabase INTO @dbname;

WHILE (@@FETCH_STATUS = 0)
BEGIN
    PRINT 'Working on ' + @dbname + '.';
    SET @statement =
        'DECLARE cStats CURSOR FOR 
SELECT 
SCH.name AS Sch,
O.name AS Tab, 
S.name AS Obj
FROM [' + @dbname + '].sys.stats S
INNER JOIN [' + @dbname + '].sys.objects O ON S.object_id = O.object_id
INNER JOIN [' + @dbname + '].sys.schemas SCH ON O.schema_id = SCH.schema_id
WHERE O.type = ''U''
ORDER BY S.name

DECLARE @sch VARCHAR(255)
DECLARE @tab VARCHAR(255)
DECLARE @obj VARCHAR(255)
DECLARE @cmd NVARCHAR(MAX)


OPEN cStats
FETCH NEXT FROM cStats INTO @sch, @tab, @obj

WHILE(@@FETCH_STATUS = 0)
BEGIN
SET @cmd = 
''INSERT #tmpstat '' +
''EXEC(''''DBCC SHOW_STATISTICS(
''''''''[' + @dbname + '].['' +
@sch + ''].['' + @tab + '']'''''''', ['' + 
@obj + '']) WITH STAT_HEADER, NO_INFOMSGS'''')''
--PRINT @cmd;
EXEC sp_executeSQL @cmd;

IF EXISTS(
SELECT * FROM #tmpstat
WHERE
[Name] = @obj
AND 
ISNULL([Rows], 0) <> ISNULL([Rows Sampled], 0)
)
BEGIN
INSERT INTO #cmd
SELECT ''UPDATE STATISTICS '' + 
''[' + @dbname + '].'' +
''['' + @sch + ''].['' + @tab + ''] WITH FULLSCAN''

END

INSERT INTO #tmpstatDatabase
SELECT ''' + @dbname + ''', ''['' + @sch + ''].['' + @tab + '']'', * FROM #tmpstat

INSERT INTO #tmpMismatch 
SELECT ''' + @dbname + ''', ''['' + @sch + ''].['' + @tab + '']'', * FROM #tmpstat
WHERE
[Name] = @obj
AND 
ISNULL([Rows], 0) <> ISNULL([Rows Sampled], 0) 

TRUNCATE TABLE #tmpstat;

FETCH NEXT FROM cStats INTO @sch, @tab, @obj 
END 

CLOSE cStats;
DEALLOCATE cStats;'
        
        --PRINT @statement;
        
        EXEC master..sp_ExecuteSQL @statement;
        
        FETCH NEXT FROM cDatabase INTO @dbname;
END

CLOSE cDatabase;
DEALLOCATE cDatabase;

--SELECT * FROM #tmpstatDatabase;
SELECT * FROM #tmpMismatch;
SELECT DISTINCT * FROM #cmd;

DROP TABLE #tmpstat;
DROP TABLE #cmd;
DROP TABLE #tmpMismatch;
DROP TABLE #tmpstatDatabase;
GO

L'output sara' l'elenco di statistiche sottocampionate. Il secondo recordset e' l'elenco dei comandi T-SQL per correggere la situazione.

Happy coding,

Francesco Cogno