merge chemicalhash h
using fmdchemical f on h.strsubstances=f.StrSubstance and h.strmass=f.strmass and h.strcasnumber=f.strcasnumber
when matched then
update set h.chemicalid=f.chemicalid ;
slow execution WHEN update chemical id on table chemicalhash so how to enhance it ?
I work on sql server 2017 i need update chemical id on table chemicalhash based on 3 columns compare to table fmd chemical
compare 3 columns strSubstances,strMass,strCASNumber from table chemicalhash
with 3 columns strSubstances,strMass,strCASNumber from table fmdchemical to get chemical id
but issue slow so how to enhance it
CREATE TABLE [dbo].[chemicalhash](
[id] [int] IDENTITY(1,1) NOT NULL,
[ChemicalID] [int] NULL,
[strSubstances] [nvarchar](max) NULL,
[strMass] [nvarchar](max) NULL,
[strCASNumber] [nvarchar](max) NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
SET IDENTITY_INSERT [dbo].[chemicalhash] ON
INSERT [dbo].[chemicalhash] ([id], [ChemicalID], [strSubstances], [strMass], [strCASNumber]) VALUES (1, NULL, N'3P04,Al2O3,C,C,Cr203,Cu,Cu,Fe,Mn,Others,Others,Others,Others,Pb,Si02,SiC,Silicone Resin,SiO2,Sn,Sn,Ti02', N'0.0004,0.0096,0.336,0.3992,0.5904,0.764,0.7904,1.5408,1.6176,1.6808,1.6808,2.9344,5.2392,7.2,7.9608,19.2072,32.08,65.3584,148.3384,232.7992,269.472', N'592910,592960,592961,593043,593043,593259,593290,593306,593306,593495,593501,593501,593505,593505,621442,621442,621442,621442,622089,624020,650559')
INSERT [dbo].[chemicalhash] ([id], [ChemicalID], [strSubstances], [strMass], [strCASNumber]) VALUES (2, NULL, N'3P04,Al2O3,C,C,Cr203,Cu,Cu,Fe,Mn,Others,Others,Others,Others,Pb,Si02,SiC,Silicone Resin,SiO2,Sn,Sn,Ti02', N'0.0004,0.0096,0.336,0.3992,0.5904,0.764,0.7904,1.5408,1.6176,1.6808,1.6808,2.9344,5.2392,7.2,7.9608,19.2072,32.08,65.3584,148.3384,232.7992,269.472', N'592910,592960,592961,593043,593043,593259,593290,593306,593306,593495,593501,593501,593505,593505,621442,621442,621442,621442,622089,624020,650559')
SET IDENTITY_INSERT [dbo].[chemicalhash] OFF
Second table
CREATE TABLE [dbo].[fmdchemical](
[id] [int] IDENTITY(1,1) NOT NULL,
[chemicalid] [int] NULL,
[StrSubstance] [nvarchar](3500) NULL,
[strmass] [nvarchar](3500) NULL,
[strcasnumber] [nvarchar](3500) NULL
) ON [PRIMARY]
GO
SET IDENTITY_INSERT [dbo].[fmdchemical] ON
INSERT [dbo].[fmdchemical] ([id], [chemicalid], [StrSubstance], [strmass], [strcasnumber]) VALUES (826807, 748787, N'3P04,AL2O3,C,C,Cr203,Cu,Cu,Fe,Mn,Others,others,others,Others,Pb,Si02,SiC,Silicone Resin,SiO2,Sn,Sn,Ti02', N'0.0004,0.0096,0.336,0.3992,0.5904,0.764,0.7904,1.5408,1.6176,1.6808,1.6808,2.9344,5.2392,7.2,7.9608,19.2072,32.08,65.3584,148.3384,232.7992,269.472', N'592910,592960,592961,593043,593043,593259,593290,593306,593306,593495,593501,593501,593505,593505,621442,621442,621442,621442,622089,624020,650559')
SET IDENTITY_INSERT [dbo].[fmdchemical] OFF
slow on this update statment
update h set h.chemicalid=f.chemicalid from chemicalhash h
inner join fmdchemical f on h.strsubstances=f.StrSubstance and h.strmass=f.strmass and h.strcasnumber=f.strcasnumber
4 answers
Sort by: Most helpful
-
Jingyang Li 5,891 Reputation points
2022-05-17T01:39:03.923+00:00 -
Bert Zhou-msft 3,421 Reputation points
2022-05-17T03:07:08.093+00:00 Hi,@ahmed salah
Welcome to Microsoft T-SQL Q&A Forum!Have you tried creating a nonclustered index for the linked column? Like this:
CREATE NONCLUSTERED INDEX IX_Index ON [dbo].[fmdchemical](chemicalid)INCLUDE(StrSubstance,strmass,strcasnumber)
Best regards,
Bert Zhou
If the answer is the right solution, please click "Accept Answer" and kindly upvote it. If you have extra questions about this answer, please click "Comment".
Note: Please follow the steps in our Documentation to enable e-mail notifications if you want to receive the related email notification for this thread. -
https://www.ChemicalSuppliers.com 0 Reputation points
2024-03-15T13:59:06.8266667+00:00 You can employ several strategies to enhance the execution speed of your update statement in SQL Server 2017. These optimizations focus on reducing the time it takes to match records between
chemicalhash
andfmdchemical
tables based on thestrSubstances
,strMass
, andstrCASNumber
columns.Optimization Strategies:
Indexes: Ensure both
chemicalhash
andfmdchemical
tables have appropriate indexes on the columns used in the join (strSubstances
,strMass
,strCASNumber
). This can dramatically speed up the matching process.sqlCopy code CREATE
Batch Processing: If the tables are very large, consider updating the
chemicalid
in batches to reduce lock contention and improve overall performance.Optimize Data Types: Make sure the data types for
strSubstances
,strMass
, andstrCASNumber
are as compact as possible without losing precision. This can reduce the amount of data that SQL Server needs to process and move around.- Review Execution Plan: Generate and review the execution plan for your update statement to identify potential bottlenecks or improvement areas.
Sample Revised Update Statement
Here's how you might implement batch processing:
sqlCopy code DECLARE
Implementing these SQL optimizations streamlines your database interactions, like Chemical Suppliers streamlines connections between chemical suppliers and buyers. Our platform efficiently finds suppliers or buyers for your specific chemical needs, leveraging powerful search and matching technology. It's all about making connections as efficiently as possible, whether it's data in your database or participants in the global chemical industry.
-
Bruce (SqlWork.com) 55,196 Reputation points
2024-03-15T20:15:56.3733333+00:00 there are two performance issues with your query.
first is the cost of the join, if there is not an index on the lookup table. you can fix this by creating a covered index with the four columns. you could even temporarily create the index and drop after use.
the second is updating 6 million rows in one transaction. you could use a loop and update subsets of the rows at a time.
declare @startid int = 0 declare @batchsize int = 10000 declare @batchids table (id int) -- prime the pump insert @batchids select top (@batchsize) id from chemicalhash order by id -- do batch updates while @@rowcount > 0 begin -- update update chemicalhash set chemicalid=f.chemicalid from chemicalhash h inner join @batchids b on h.id = b.id inner join fmdchemical f on h.strsubstances=f.StrSubstance and h.strmass=f.strmass and h.strcasnumber=f.strcasnumber -- next batch select @startid = max(id) from @batchids delete from @batchids insert @batchids select top (@batchsize) id from chemicalhash where id > @startid order by id end
if you only need to update the null rows you can simplify
declare @batchsize int = 1 declare @rowcount int = 1 while @rowcount > 0 begin update top(@batchsize) chemicalhash set chemicalid=f.chemicalid from chemicalhash h inner join fmdchemical f on h.strsubstances=f.StrSubstance and h.strmass=f.strmass and h.strcasnumber=f.strcasnumber where h.chemicalid is null select @rowcount = @@rowcount end