Amit Amit - 1 month ago 13
SQL Question

Parent child hierarchy path without using CTE

Hi I have following tables:

create table Features
(
FeatureId bigint,
FeatureName varchar(255),
ParentId bigint
)

insert into Features values(10, 'Feature 1', 1);
insert into Features values(11, 'Feature 2', 10);
insert into Features values(12, 'Feature 3', 11);
insert into Features values(13, 'Feature 4', 2);
insert into Features values(14, 'Feature 5', 13);

insert into Features values(15, 'Feature 6', 3);
insert into Features values(16, 'Feature 7', 15);
insert into Features values(17, 'Feature 8', 16);
insert into Features values(18, 'Feature 9', 17);
insert into Features values(19, 'Feature 10', 18);
insert into Features values(20, 'Feature 11', 19);
insert into Features values(21, 'Feature 12', 12);


create table Scenarios
(
ScenarioId bigint,
ParentId bigint,
ScenarioTitle varchar(25)
)

insert into Scenarios values(1, 0, 'Scenario 1')
insert into Scenarios values(2, 0, 'Scenario 2')
insert into Scenarios values(3, 0, 'Scenario 3')


Here, a feature can have either another feature as parent or a scenario as parent. For scenario, parent id can either be 0, or another scenario.

I would like to get path of each feature as follows:

FeatureId ParentId FeatureName PathString PathLength
10 1 Feature 1 1 0
11 10 Feature 2 1/10 1
12 11 Feature 3 1/10/11 2
13 2 Feature 4 2 0
14 13 Feature 5 2/13 1
15 3 Feature 6 3 0
16 15 Feature 7 3/15 1
17 16 Feature 8 3/15/16 2
18 17 Feature 9 3/15/16/17 3
19 18 Feature 10 3/15/16/17/18 4
20 19 Feature 11 3/15/16/17/18/19 5
21 12 Feature 12 1/10/11/12 3


Since I would like to collect this result in a temp table for further processing, I tried
select into
and Azure SQL DW throws
Using SELECT INTO statement is not supported in Parallel Data Warehouse. Modify the statement and re-try executing it.


Here is my query (may not be in great shape as I am still figuring out recursive sql)

drop table FeaturesWithPath;
;WITH FeaturePaths (FeatureId, ParentId, FeatureName, PathString)
AS
(
-- Anchor member definition
SELECT g.FeatureId, g.ParentId, g.FeatureName, cast(CAST(g.ParentId as nvarchar(max)) as varchar(max)) as PathString
FROM dbo.Features AS g
UNION ALL
-- Recursive member definition
SELECT g.FeatureId, g.ParentId, g.FeatureName, PathString + '/' + cast(g.ParentId as varchar(max))
FROM dbo.Features AS g
INNER JOIN FeaturePaths AS gp
ON g.ParentId = gp.FeatureId
)

SELECT FeatureId, ParentId, FeatureName, PathString into FeaturesWithPath FROM FeaturePaths;
--select * from FeaturesWithPath order by FeatureId;

drop table FeaturesWithPathLength;
select *, LEN(PathString) - LEN(REPLACE(PathString, '/', '')) as PathLength into FeaturesWithPathLength from FeaturesWithPath
--select * from FeaturesWithPathLength order by FeatureId

drop table MaxFeaturePathLenghtRowTable;

select * into MaxFeaturePathLenghtRowTable
from FeaturesWithPathLength
where PathLength = (select max(PathLength) from FeaturesWithPathLength as f where f.FeatureId = FeaturesWithPathLength.FeatureId)
or PathLength = (select max(PathLength) from FeaturesWithPathLength as f where f.FeatureId = FeaturesWithPathLength.FeatureId
and PathLength > (select max(PathLength) from FeaturesWithPathLength as f2 where f2.FeatureId = FeaturesWithPathLength.FeatureId));

--select * from MaxFeaturePathLenghtRowTable order by FeatureId

drop table FeaturesPerParentTable
select FeatureId, [value] as NewParentId, FeatureName, COALESCE(NULLIF(SUBSTRING(PathString, 0, CHARINDEX('/', PathString)), ''), [value]) AS ScenarioId into FeaturesPerParentTable
from MaxFeaturePathLenghtRowTable
cross apply STRING_SPLIT (PathString, '/') cs order by FeatureId

select * from FeaturesPerParentTable order by FeatureId;


I tried to convert the CTE to use CTAS which did not work either.

This is how I tried CTAS:

;WITH FeaturePaths (FeatureId, ParentId, FeatureName, PathString)
AS
(
-- Anchor member definition
SELECT g.FeatureId, g.ParentId, g.FeatureName, cast(CAST(g.ParentId as nvarchar(max)) as varchar(max)) as PathString
FROM dbo.Features AS g
--WHERE parentId=0
UNION ALL
-- Recursive member definition
SELECT g.FeatureId, g.ParentId, g.FeatureName, PathString + '/' + cast(g.ParentId as varchar(max))
FROM dbo.Features AS g
INNER JOIN FeaturePaths AS gp
ON g.ParentId = gp.FeatureId
)

CREATE TABLE #tmp_fct
WITH
(
DISTRIBUTION = ROUND_ROBIN
)
AS
SELECT FeatureId, ParentId, FeatureName, PathString
FROM FeaturePaths;


Now I am wondering if there is a way to get path for each Feature on
Azure SQL DW
and collect result in to a table.

-- UPDATE --

For solution in SQL see this

Here is solution in C#

void Main()
{
var scenarios = new List<Scenario> {
new Scenario{Id = 1, Title = "Scenario 1", ParentId = 0},
new Scenario{Id = 2, Title = "Scenario 2", ParentId = 0},
new Scenario{Id = 3, Title = "Scenario 3", ParentId = 0},
};

var features = new List<Feature> {
new Feature{Id =10, Title = "Feature 1", ParentId =1},
new Feature{Id =11, Title = "Feature 2", ParentId =10},
new Feature{Id =12, Title = "Feature 3", ParentId =11},
new Feature{Id =13, Title = "Feature 4", ParentId =2},
new Feature{Id =14, Title = "Feature 5", ParentId =13},

new Feature{Id =15, Title = "Feature 6", ParentId =3},
new Feature{Id =16, Title = "Feature 7", ParentId =15},
new Feature{Id =17, Title = "Feature 8", ParentId =16},
new Feature{Id =18, Title = "Feature 9", ParentId =17},
new Feature{Id =19, Title = "Feature 10", ParentId =18},
new Feature{Id =20, Title = "Feature 11", ParentId =19},
new Feature{Id =21, Title = "Feature 12", ParentId =12}
};

var scenarioIds = new HashSet<long>(scenarios.Select(x => x.Id));

//get path
IList<Feature> withPath = features.Select(x => { x.Path = GetPath(x, features, scenarioIds); return x; }).ToList().Dump("With path");
}

private string GetPath(Feature f, IList<Feature> features, HashSet<long> scenarioIds)
{
if (scenarioIds.Contains(f.ParentId))
{
return f.ParentId.ToString();
}
else
{
var parent = features.First(d => d.Id == f.ParentId);
return GetPath(parent, features, scenarioIds) + "/" + f.ParentId;
}
}

public class Scenario
{
public long Id { get; set; }
public string Title { get; set; }
public long ParentId { get; set; }
}

public class Feature
{
public long Id { get; set; }
public string Title { get; set; }
public long ParentId { get; set; }
public string Path { get; set; } //temp
}

Answer

As Azure SQL Data Warehouse does not support recursive CTEs or cursors at this time, you could do this with a good old-fashioned loop, eg:

-- Loop thru Features
DECLARE @counter INT = 1;

-- Insert first record where no parent exists
IF OBJECT_ID('tempdb..#features') IS NOT NULL DROP TABLE #features;

CREATE TABLE #features
WITH
    (
    DISTRIBUTION = HASH ( FeatureId ),
    LOCATION = USER_DB
    )
AS
WITH cte AS
(
SELECT 1 AS xlevel, p.FeatureId, p.ParentId, p.FeatureName, CAST( p.ParentId AS VARCHAR(255) ) AS PathString, 0 AS PathLength
FROM dbo.Features p
WHERE NOT EXISTS 
    (
    SELECT *
    FROM dbo.Features c
    WHERE p.ParentId = c.FeatureId
    )
)
SELECT *
FROM cte;


SELECT 'before' s, * FROM #features ORDER BY FeatureId;

-- Loop recursively through the child records
WHILE EXISTS (
SELECT *
    FROM #features p
        INNER JOIN dbo.features c ON p.FeatureId = c.ParentId
    WHERE p.xlevel = @counter
    )
BEGIN

    -- Insert next level
    INSERT INTO #features ( xlevel, FeatureId, ParentId, FeatureName, PathString, PathLength )
    SELECT @counter + 1 AS xlevel, c.FeatureId, c.ParentId, c.FeatureName, p.PathString + '/' + CAST( c.ParentId AS VARCHAR(255) ) AS PathString, @counter AS PathLength
    FROM #features p
        INNER JOIN dbo.features c ON p.FeatureId = c.ParentId
    WHERE p.xlevel = @counter;

    SET @counter += 1;

    -- Loop safety
    IF @counter > 99
    BEGIN 
        RAISERROR( 'Too many loops!', 16, 1 ) 
        BREAK 
    END;

END


SELECT 'after' s, *  FROM #features ORDER BY FeatureId;

Full code including setup is available here.

My results: My results

Hope that helps.