Insert dummy data in tables with relationship

Torno spesso sull’argomento relativo ai test di carico / performance dei database e sull’importanza di poter profilare l’utilizzo che avranno con una mole di dati quantomeno simile allo scenario di produzione.

Diventa importante, quindi, avere un qualsivoglia meccanismo di generazione automatica di dati in modo tale da demandare ad un processo automatico il popolamento delle nostre tabelle.

Ho già parlato qui di una possibile stored procedure che consenta di inserire dati “finti” sulla base del tipo di dati con cui è modellata ciascuna colonna.

 

Il problema, di questa implementazione, è l’impossibilità di inserire valori su colonne che dipendono da valori di una tabella padre.

Prendiamo, ad esempio, due ipotetiche tabelle che consentono di memorizzare l’intestazione e le righe di fatture.

Qualcosa come:

image

 

La necessità, sulla tabella “figlia”, è quella di inserire, nelle colonne di foreign key, soltanto valori che siano presenti nelle colonne primary key della tabella “padre”.

Effettuo il setup delle tabelle come mostrate in figura:

 /* start setup database for test */
USE demo;
GO

if object_id('dbo.invoiceRow') is not null
begin
  drop table dbo.invoiceRow
end
GO

if object_id('dbo.invoiceHeader') is not null
begin
  drop table dbo.invoiceHeader
end
GO


create table invoiceHeader
(
    idInvoiceHeader smallint identity(-32000,1),
    idInvoiceHeader2 smallint not null,
    invoiceDate date,
    idCustomer int
)
alter table invoiceHeader add constraint PKInvoiceHeader primary key (idInvoiceHeader, idInvoiceHeader2)
GO

create table invoiceRow
(
    idInvoiceRow smallint primary key identity(-32000,1),
    idInvoiceHeader smallint, 
    idInvoiceHeader2 smallint,
    idProduct int,
    qty smallint,
    price money
)
alter table invoiceRow add CONSTRAINT FKInvoiceHeader 
    FOREIGN KEY (idInvoiceHeader, idInvoiceHeader2) 
    REFERENCES InvoiceHeader (idInvoiceHeader, idInvoiceHeader2)

GO
/* end setup database for test */

 

Di seguito, invece, una possibile implementazione della procedura in grado di inserire valori in entrambe le tabelle:

 IF EXISTS (SELECT * FROM sys.objects WHERE type = 'P' AND name = 'up_insertDummyDataWithFK')
DROP PROCEDURE dbo.up_insertDummyDataWithFK
GO

CREATE PROCEDURE dbo.up_insertDummyDataWithFK
(
    @schemaName nvarchar( 250 ) , 
    @tableName nvarchar( 250 ) , 
    @numberOfRows int 
)
AS
BEGIN
    SET NOCOUNT ON;

    DECLARE @i int = 0, @tsql varchar( max ) = '';
    WHILE (@i < @numberOfRows)
        BEGIN

          /* check if exists relationship in the table */
          IF EXISTS (SELECT TOP 1 1 FROM sys.foreign_keys WHERE parent_object_id = OBJECT_ID( @tableName ))
          BEGIN
             declare @TSqlFromForeignKeyValues nvarchar(max)
             declare @nrFK tinyint
             declare @x tinyint = 0
             declare @PrimaryKeyValues table (columnName varchar(100), value nvarchar(100), tsqlStatement nvarchar(max))

             /* retrieve primary keys from parent table */
             INSERT @PrimaryKeyValues (columnName, tsqlStatement) 
             SELECT DISTINCT CU.COLUMN_NAME, 'SELECT TOP 1 @v = ' + CU.COLUMN_NAME + ' FROM ' + PK.TABLE_NAME + ' ORDER BY NEWID()' as tSQL
               FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS C 
                INNER JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS FK ON C.CONSTRAINT_NAME = FK.CONSTRAINT_NAME
                INNER JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS PK ON C.UNIQUE_CONSTRAINT_NAME = PK.CONSTRAINT_NAME
                INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE CU ON C.CONSTRAINT_NAME = CU.CONSTRAINT_NAME
                INNER JOIN
                ( 
                    SELECT i1.TABLE_NAME , i2.COLUMN_NAME
                    FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS i1 
                       INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE i2 ON i1.CONSTRAINT_NAME = i2.CONSTRAINT_NAME
                    WHERE i1.CONSTRAINT_TYPE = 'PRIMARY KEY' 
                ) PT ON PT.TABLE_NAME = PK.TABLE_NAME
                WHERE FK.TABLE_NAME = @tableName AND FK.TABLE_SCHEMA = @schemaName;

                set @nrFK = (select @@rowcount)
                --> select @nrFK as numberOfForeignKeys

                declare @v nvarchar(100)
                declare @cName varchar(100) = ''

                /* for each primary key retrieve a valid value from parent table */
                while (@x < @nrFK)
                begin
                    
                    set @TSqlFromForeignKeyValues = 
                          (
                             select top 1 [@PrimaryKeyValues].tSQLStatement 
                             from @PrimaryKeyValues 
                             where [@PrimaryKeyValues].[value] is null 
                             order by [@PrimaryKeyValues].tsqlStatement
                          )
                    
                    if (@x > 0)
                    begin
                       declare @tsqlModified nvarchar(max)
                       set @tsqlModified = replace(@TSqlFromForeignKeyValues,'ORDER BY NEWID()',' WHERE ' + @cName + ' = ' + @v + ' ORDER BY NEWID()')
                       exec sp_executesql @tsqlModified, N'@v nvarchar(100) out', @v out   
                    end
                    else
                    begin
                       exec sp_executesql @TSqlFromForeignKeyValues, N'@v nvarchar(100) out', @v out
                    end

                    set @cName = (select top 1 [@PrimaryKeyValues].columnName from @PrimaryKeyValues where [@PrimaryKeyValues].[value] is null order by [@PrimaryKeyValues].tsqlStatement)
                    
                    update @PrimaryKeyValues set [@PrimaryKeyValues].[value] = @v where [@PrimaryKeyValues].tsqlStatement = @TSqlFromForeignKeyValues
                    set @x += 1
                end
                
          END

          declare @FKColumnName nvarchar(max) = ''
          declare @FKValues nvarchar(max) = ''

          if exists(select top 1 1 from @PrimaryKeyValues)
          begin
             select 
                @FKColumnName = @FKColumnName + [@PrimaryKeyValues].columnName + ', ' , 
                @FKValues = @FKValues + [@PrimaryKeyValues].[value] + ', ' from @PrimaryKeyValues

             set @FKColumnName = left(@FKColumnName, len(@FKColumnName)-1)
             set @FKColumnName = ', ' + @FKColumnName
             
             set @FKValues = left(@FKValues, len(@FKValues)-1)
             set @FKValues = ', ' + @FKValues
          end

          delete from @PrimaryKeyValues
       
       /* retrieve column list without foreign keys */
       declare @columnListWhitoutFK nvarchar(max)
       set @columnListWhitoutFK = ''
       SELECT @columnListWhitoutFK = @columnListWhitoutFK + T.columnName + ', ' FROM
       (
          SELECT C.name as columnName
          FROM sys.columns C
          JOIN sys.tables T on t.OBJECT_ID = c.OBJECT_ID
          WHERE T.name = @tableName AND T.schema_id = schema_id(@schemaName) AND
             C.is_identity = 0
       
          EXCEPT
    
          SELECT COL_NAME(fc.parent_object_id, fc.parent_column_id) AS FKColumnName
          FROM sys.foreign_keys AS f
          INNER JOIN sys.foreign_key_columns AS fc ON f.OBJECT_ID = fc.constraint_object_id
          where OBJECT_NAME(f.parent_object_id) = @tableName and f.schema_id = schema_id(@schemaName)
       ) T

       set @columnListWhitoutFK = left(@columnListWhitoutFK, len(@columnListWhitoutFK)-1)

          /* generate random value for each column (no foreign keys) */
            SELECT @tsql = @tsql + CASE
                                 WHEN c.column_id = 1 THEN 'INSERT ' + @schemaName + '.[' + t.name + '] ' + 
                             '(' + @columnListWhitoutFK + @FKColumnName + ') VALUES ( '
                                     ELSE ''
                                 END + 
                          CASE
                          WHEN c.is_identity = 0 THEN CASE
                            WHEN y.name IN ( 'bit' , 'bigint' , 'int' , 'smallint' , 'tinyint' , 'float' , 'decimal' , 
                             'numeric' , 'money' , 'smallmoney' , 'real' )
                          THEN SUBSTRING( CAST(1000*RAND() AS VARCHAR(50)) , 1 , c.max_length)
                            WHEN y.name IN ( 'binary' , 'varbinary' ) 
                          THEN SUBSTRING( '0x546869732069732044756D6D792044617461' , 1, c.max_length )
                            WHEN y.name IN ( 'varchar' , 'char' , 'text', 'nchar' , 'nvarchar' , 'ntext' ) 
                          THEN '''' + SUBSTRING( 'some data some data some data' , 1 , c.max_length /2 ) + ''''
                            WHEN y.name IN ( 'date' , 'time' , 'datetime' , 'datetime2' , 'smalldatetime' , 'datetimeoffset' ) 
                          THEN '''' + CONVERT( varchar( 25 ) , GETDATE( ) , 121 ) + ''''
                            WHEN y.name IN ( 'uniqueidentifier' ) 
                          THEN '''' + CAST( NEWID( ) AS varchar( 36 )) + ''''
                       ELSE '' END + CASE
                                WHEN c.column_id = (SELECT MAX( column_id ) FROM sys.columns WHERE OBJECT_ID = c.OBJECT_ID )THEN @FKValues + ');'
                                    ELSE ','
                                END
                              ELSE ''
                          END 
        FROM sys.tables AS t 
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
        INNER JOIN sys.columns c ON t.OBJECT_ID = c.OBJECT_ID
        INNER JOIN sys.types AS y ON c.user_type_id = y.user_type_id
        WHERE 
          t.name = @tableName AND 
            s.name = @schemaName AND
          c.name IN
          (
             SELECT 
                COLUMN_NAME as columnName
             FROM INFORMATION_SCHEMA.COLUMNS
             WHERE 
                TABLE_NAME = @tableName AND 
                TABLE_SCHEMA = @schemaName
       
             EXCEPT
    
             SELECT 
                COL_NAME( fc.parent_object_id , fc.parent_column_id )AS FKColumnName
             FROM sys.foreign_keys AS f 
             INNER JOIN sys.foreign_key_columns AS fc ON f.OBJECT_ID = fc.constraint_object_id
             WHERE 
                OBJECT_NAME( f.parent_object_id ) = @tableName AND 
                f.schema_id = SCHEMA_ID( @schemaName )
          )
              
          SET @i+=1;

        /* insert the values */
        --> SELECT @tsql
          EXEC ( @tsql );
          SET @tsql = '';
        END
END;
GO

Un esempio di caricamento dati (attenzione: naturalmente va popolata prima la tabella “padre”):

 /* test the procedure (before the parent, next the child table) */
EXEC dbo.up_insertDummyDataWithFK @schemaName = 'dbo', @tableName = 'invoiceHeader' , @numberOfRows = 10;
EXEC dbo.up_insertDummyDataWithFK @schemaName = 'dbo', @tableName = 'invoiceRow' , @numberOfRows = 50;
GO

Vedo qualche dato:

 /* select the dummy data */
select count(*) as nrHeaders, (select count(*) from invoiceRow) as nrRows from invoiceHeader
select top 2 * from invoiceHeader 
select top 2 * from invoiceRow 
GO
 

image