Anna Bokovskaya Anna Bokovskaya - 9 days ago 5
SQL Question

Collect all Similar Persons to One Group

I have a person with several Id's.
Some of them in Column Id1 and Some of them in Id2.
I want to collect all the same persons Id's to one group.

If id1=10, is in the same row with id2=20. so it's mean that person with id1=10 he is the same person like id2=20.

The Input and Output example:

Input



Id1 Id2
--- ---
10 20
10 30
30 30
10 40

50 70
60 50
70 70


Output



NewId OldId
----- -----
1 10
1 20
1 30
1 40

2 50
2 60
2 70

Answer

I suspect this could be done with recursive CTEs, but here is a less elegent solution.

-- CREATE Temps
CREATE TABLE #Table (id1 INT, id2 INT)
CREATE TABLE #NewTable (NewID INT, OldID INT)
CREATE TABLE #AllIDs (ID INT)

-- Insert Test data
INSERT #Table
        ( id1, id2 )
VALUES  (  10, 20 ),
        (  10, 30 ),
        (  30, 20 ),
        (  10, 40 ),

        (  50, 70 ),
        (  60, 50 ),
        (  70, 70 ),
        (  110, 120 ),
        (  120, 130 ),
        (  140, 130 )

-- Assemble all possible OldIDs
INSERT INTO #AllIDs
    SELECT id1 FROM #Table
    UNION
    SELECT id2 FROM #Table

DECLARE @NewID INT = 1,
        @RowCnt int

-- Insert seed OldID
INSERT #NewTable
    SELECT TOP 1 @NewID, id
    FROM #AllIDs
    WHERE id NOT IN (SELECT OldID FROM #NewTable)
    ORDER BY 2

SET @RowCnt = @@ROWCOUNT

WHILE @RowCnt > 0
BEGIN   
    WHILE @RowCnt > 0
    BEGIN
        -- Check for id2 that match current OldID
        INSERT #NewTable
            SELECT DISTINCT @NewID, id2
            FROM #Table t
                INNER JOIN #NewTable nt ON t.id1 = nt.OldID
            WHERE nt.[NewID] = @NewID
                AND t.id2 NOT IN (SELECT OldID FROM #NewTable WHERE [NewID] = @NewID)

        SELECT @RowCnt = @@ROWCOUNT

        -- Check for id1 that match current OldID
        INSERT #NewTable
            SELECT DISTINCT @NewID, id1
            FROM #Table t
                INNER JOIN #NewTable nt ON t.id2 = nt.OldID
            WHERE nt.[NewID] = @NewID
                AND t.id1 NOT IN (SELECT OldID FROM #NewTable WHERE [NewID] = @NewID)

        SELECT @RowCnt = @RowCnt + @@ROWCOUNT
    END

    SET @NewID = @NewID + 1

    -- Add another seed OldID if any left
    INSERT #NewTable
        SELECT TOP 1 @NewID, id
        FROM #AllIDs
        WHERE id NOT IN (SELECT OldID FROM #NewTable)
        ORDER BY 2

    SELECT @RowCnt = @@ROWCOUNT
END

-- Get Results
SELECT * FROM #NewTable ORDER BY [NewID], OldID