Kind of an optimisation question and kind of sharing in the hope it might be of use to someone else.
PROBLEM
I needed to work out where the nodes were in an XML download so that I knew what I was looking for. Maybe there's a T-SQL solution to this already but on a brief Google search it wasn't obvious so I had to come up with something. Inadvertently I ended up shredding the XML at the same time but at least I got the data I was after. Ideally, I would have kept it to varchar(8000) and 1 page but I cannot do that because the XML is a blob so it had to go varchar(max). As you can imagine that is VERY dangerous performance-wise and being a table function that essentially is a loop within a loop scanning sequentially through each cell that can go parallel is a recipe for disaster. So, the function is to be used with caution, preferably only on 1 row of data or a few rows of data at a time to get an idea of what nodes are present. If you can keep the total data length under a page (8000 bytes) then so much the better. The function is inspired by the split-string function from the legend that is Jeff Moden - thanks, Jeff! If there's a more efficient way of doing this from T-SQL then I'm all ears.
MY SOLUTION / BODGIT.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[tfnFindXMLnodes] (@XML xml)
-- Table-valued user-defined function - TVF
/*
AUTHOR JCEH
CREDIT TO JEFF MODEN FOR HIS INSPIRATIONAL SPLITSTRING FUNCTION. I just jazzed it up a little.
*/
RETURNS @TableList TABLE(Seq int,ElementOrAttribute char(1) COLLATE Latin1_General_CS_AS,NodeLevel int,NodePath varchar(1000),Element varchar(255),AttributeOrValue varchar(255),StringX varchar(255))
BEGIN
DECLARE @XMLstring varchar(max) = CAST(@XML as varchar(max)) -- XML input as string to analyse
DECLARE @XMLattrib varchar(8000)
DECLARE @xcl int = 0,@NodePath varchar(1024)='',@StringNode varchar(255),@type char(1),@StringAttribute varchar(255),@StringElement varchar(255),@level int=0, @smallint smallint = 1-- Outputs
DECLARE @StartString varchar(5)='<',@EndString varchar(5)='>' ,@StartPointer int,@EndPointer int, @EndSpace int -- Start and end points
DECLARE @StartStringA varchar(5)=' ',@EndStringA varchar(5)='="',@StartPointerA int,@EndPointerA int -- Start and end points of attributes
--- Find starting point
SELECT @StartPointer = CHARINDEX(@StartString, @XMLstring) --- Find first opening bracket
,@EndPointer = CHARINDEX(@EndString, @XMLstring,CHARINDEX(@StartString, @XMLstring)) -- Find first closing bracket
,@EndSpace = CHARINDEX(@EndStringA, @XMLstring,CHARINDEX(@StartString, @XMLstring)) -- Find first =" after opening in case of attributes present
---------------------------------------------------------------------------------------------------
WHILE (@StartPointer < LEN(@XMLstring) + 1)
BEGIN -- while loop 1
SET @int = @int+1
IF @EndPointer = 0 SET @EndPointer = LEN(@XMLstring)+1
IF @EndSpace = 0 SET @EndSpace = LEN(@XMLstring)+1
--- Are we looking at an element on its own or are there also attributes?
SET @type = CASE
WHEN @EndPointer<@EndSpace
THEN 'E'
ELSE 'a'
END
SET @StringNode = LTRIM(RTRIM(SUBSTRING(@XMLstring, @StartPointer, @EndPointer-@StartPointer+1)))
SET @level = CASE WHEN @StringNode LIKE '%</%' THEN @level-1 ELSE @level+1 END
SET @StringElement= CASE WHEN @type='E' THEN SUBSTRING(@StringNode,CHARINDEX(@StartString,@StringNode,1),255) ELSE SUBSTRING(@StringNode,CHARINDEX(@StartString,@StringNode,1),CHARINDEX(' ',@StringNode,1)) END
SET @StringAttribute = CASE WHEN @StringNode LIKE '%</%' THEN LEFT(@StringNode,CHARINDEX('</',@StringNode,1)-1) ELSE '' END
SET @NodePath = CASE
WHEN @StringNode LIKE '%</%' THEN LEFT(@NodePath,LEN(@NodePath)-CHARINDEX('/',REVERSE(@NodePath),1)) -- Closing part
ELSE @NodePath+CASE WHEN @level=1 THEN '' ELSE '/'END+CASE WHEN @StringElement LIKE '<%/>' THEN SUBSTRING(@StringElement,2,LEN(@StringElement)-3) WHEN @StringElement LIKE '<%>' THEN RTRIM(SUBSTRING(@StringElement,2,LEN(@StringElement)-2)) ELSE RTRIM(SUBSTRING(@StringElement,2,255)) END
END
INSERT INTO @TableList(Seq,ElementOrAttribute,NodeLevel,NodePath,Element,AttributeOrValue,StringX)
VALUES(@int,CASE WHEN @type='a' THEN 'e' WHEN @StringAttribute='' THEN 'E' ELSE 'V' END,@level
--,@NodePath on lines below!
,CASE
WHEN @StringElement LIKE '</%>' AND RIGHT(@NodePath,CHARINDEX('/',REVERSE(@NodePath),1))<>'/'+CASE WHEN @StringElement LIKE '</%>' THEN SUBSTRING(@StringElement,3,LEN(@StringElement)-3) WHEN @StringElement LIKE '<%>' THEN RTRIM(SUBSTRING(@StringElement,2,LEN(@StringElement)-2)) ELSE RTRIM(SUBSTRING(@StringElement,2,255)) END THEN @NodePath+'/'+CASE WHEN @StringElement LIKE '</%>' THEN SUBSTRING(@StringElement,3,LEN(@StringElement)-3) WHEN @StringElement LIKE '<%>' THEN RTRIM(SUBSTRING(@StringElement,2,LEN(@StringElement)-2)) ELSE RTRIM(SUBSTRING(@StringElement,2,255)) END
ELSE @NodePath END
/*Element*/,CASE WHEN @StringElement LIKE '</%>' THEN SUBSTRING(@StringElement,3,LEN(@StringElement)-3) WHEN @StringElement LIKE '<%>' THEN RTRIM(SUBSTRING(@StringElement,2,LEN(@StringElement)-2)) ELSE RTRIM(SUBSTRING(@StringElement,2,255)) END
,@StringAttribute,@StringNode)
---------------------------------------------------------------------------------------------------
--- If attributes present then setup sub-loop to obtain each attribute in turn
IF(@type='a')
BEGIN -- Attributes present
SET @XMLattrib = SUBSTRING(@XMLstring --- Need substring to split with all attributes
,CHARINDEX(@StartString, @XMLstring,@StartPointer)+1 -- starting at opening bracket
,CHARINDEX(@EndString, @XMLstring,@StartPointer)-CHARINDEX(@StartString, @XMLstring,@StartPointer) -- ending where closing angle bracket.
)
SET @StartStringA = ' '
SET @EndStringA = '="'
SELECT @StartPointerA = CHARINDEX(@StartStringA,@XMLattrib,1) --- Find first attribute start (assume space before it)
,@EndPointerA = CHARINDEX(@EndStringA,@XMLattrib,CHARINDEX(@StartStringA, @XMLattrib,1)+1) -- Find first attribute value start after the space
WHILE (@StartPointerA<LEN(@XMLattrib))
BEGIN -- while loop 2
SET @int = @int+1 -- For seq number
IF @EndPointerA = 0 SET @EndPointerA = LEN(@XMLattrib)+1 -- if no end then use whole string
SET @StringAttribute = SUBSTRING(@XMLattrib,@StartPointerA,@EndPointerA-@StartPointerA)
INSERT INTO @TableList(Seq,ElementOrAttribute,NodeLevel,NodePath,Element,AttributeOrValue,StringX)
VALUES(@int,CASE WHEN @StringAttribute LIKE '%"%"%' THEN 'v' ELSE @type END,@level,@NodePath,RTRIM(SUBSTRING(@StringElement,2,255)),CASE WHEN @StringAttribute LIKE '"%"/>' THEN SUBSTRING(@StringAttribute,2,LEN(@StringAttribute)-4) WHEN @StringAttribute LIKE '"%">' THEN SUBSTRING(@StringAttribute,2,LEN(@StringAttribute)-3) WHEN @StringAttribute LIKE '"%"' THEN SUBSTRING(@StringAttribute,2,LEN(@StringAttribute)-2) ELSE @StringAttribute END,@XMLattrib)
--- Go to next value
IF(@smallint>0)
BEGIN -- >0 then space is currently starting point and needs switching
SET @StartPointerA = @EndPointerA + 1
SET @EndPointerA = CHARINDEX(@StartStringA, @XMLattrib, @StartPointerA)
END -- >0 then space is currently starting point and needs switching
--- Go to next attribute
IF(@smallint<0)
BEGIN -- <0 then =" is currently starting point and needs switching
SET @StartPointerA = @EndPointerA + 1
SET @EndPointerA = CHARINDEX(@EndStringA, @XMLattrib, @StartPointerA)
END -- <0 then space is currently starting point and needs switching
SET @smallint=@smallint*CAST(-1 as smallint) -- Flip start and end delimiters
END -- while loop 2
SET @level = CASE WHEN @StringAttribute LIKE '%/>' THEN @level-1 ELSE @level END
SET @NodePath = CASE WHEN @StringAttribute LIKE '%/>' THEN LEFT(@NodePath,LEN(@NodePath)-CHARINDEX('/',REVERSE(@NodePath),1)) ELSE @NodePath END
END -- Attributes present
---------------------------------------------------------------------------------------------------
SET @StartPointer = @EndPointer + 1
SET @EndPointer = CHARINDEX(@EndString, @XMLstring, @StartPointer)
SET @EndSpace = CHARINDEX(@EndStringA, @XMLstring,@StartPointer)
END -- while loop 1
---------------------------------------------------------------------------------------------------
RETURN
---------------------------------------------------------------------------------------------------
END -- FUNCTION
GO