colonelsanders91 colonelsanders91 - 23 days ago 15
SQL Question

How to parse a complicated string via MSSQL

I'm trying to figure out the best way to parse a complex JSON object stored as a string via MSSQL.

My table has the following information:

LogID | Content
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
55271413 | {"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5218912","CarrierScac":"XYZ","Latitude":33.595555,"Longitude":-85.854722,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}

55271414 | {"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5218944","CarrierScac":"XYZ","Latitude":37.996666,"Longitude":-78.314444,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}

55271415 | {"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5219079","CarrierScac":"YZB","Latitude":34.027500,"Longitude":-117.522222,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}

55271416 | {"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5219020","CarrierScac":"XYZ","Latitude":37.754722,"Longitude":-121.144166,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}

55271417 | {"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5218911","CarrierScac":"XYZ","Latitude":40.585833,"Longitude":-91.425000,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}

55271418 | {"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5218785","CarrierScac":"XYZ","Latitude":30.747500,"Longitude":-85.270277,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}

55271426 | {"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5219044","CarrierScac":"XYZ","Latitude":33.598333,"Longitude":-97.936388,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}


I'm trying to parse each string and place it inside a new column with the name of the JSON attribute as the column name, and the corresponding value as the row value.

For example, here would be the results I'm looking for below for each row:

LogID | LicensePlate | FreightHaulerProviderXId | FreightProviderReferenceNumber | CarrierScac | Latitude | Longitude | StreetAddress1 | StreetAddress2 | City | State | PostalCode | Country
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
55271413 | | ABC | 5218912 | XYZ | 33.595555 | -85.854722 | | | | | |


I've tried parsing it using some probably pretty bad SQL logic. Essentially I look for the entire string, grab a sub-string, and then manually assign a column name. This isn't a very good solution for scaleability and performance.

SELECT DISTINCT
SUBSTRING(lcon.Content, CHARINDEX('CarrierScac', lcon.Content)+14, CHARINDEX('City',lcon.Content) - CHARINDEX('CarrierScac', lcon.Content) + Len('City')-21) as 'CarrierScac',
SUBSTRING(lcon.Content, CHARINDEX('Latitude', lcon.Content)+10, CHARINDEX('Longitude',lcon.Content) - CHARINDEX('Latitude', lcon.Content) + Len('Longitude')-21) as 'Latitude',
SUBSTRING(lcon.Content, CHARINDEX('Longitude', lcon.Content)+11, CHARINDEX('PositionEventType',lcon.Content) - CHARINDEX('Longitude', lcon.Content) + Len('"PositionEventType')-31) as 'Longitude'
FROM
acg.LogContext lcon
WHERE
lcon.Content LIKE '%XYZ%'


Any help would be appreciated as I seem to be completely stumped even after researching techniques for the better half of the day.

Thanks!

Answer

With the help of a parse function and two cross applies...

Declare @YourTable table (LogID int,Content varchar(max))
Insert Into @YourTable values
(55271413,'{"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5218912","CarrierScac":"XYZ","Latitude":33.595555,"Longitude":-85.854722,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}'),
(55271414,'{"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5218944","CarrierScac":"XYZ","Latitude":37.996666,"Longitude":-78.314444,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}'),
(55271415,'{"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5219079","CarrierScac":"YZB","Latitude":34.027500,"Longitude":-117.522222,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}'),
(55271416,'{"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5219020","CarrierScac":"XYZ","Latitude":37.754722,"Longitude":-121.144166,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}'),
(55271417,'{"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5218911","CarrierScac":"XYZ","Latitude":40.585833,"Longitude":-91.425000,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}'),
(55271418,'{"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5218785","CarrierScac":"XYZ","Latitude":30.747500,"Longitude":-85.270277,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}'),
(55271426,'{"LicensePlate":"","FreightHaulerProviderXId":"ABC","FreightProviderReferenceNumber":"5219044","CarrierScac":"XYZ","Latitude":33.598333,"Longitude":-97.936388,"StreetAddress1":"","StreetAddress2":"","City":"","State":"","PostalCode":"","Country":""}')


Select LogID
      ,LicensePlate = max(case when Item='LicensePlate' then Value else null end)
      ,FreightHaulerProviderXId = max(case when Item='FreightHaulerProviderXId' then Value else null end)
      ,FreightProviderReferenceNumber = max(case when Item='FreightProviderReferenceNumber' then Value else null end)
      ,CarrierScac = max(case when Item='CarrierScac' then Value else null end) 
      ,Latitude = max(case when Item='Latitude' then Value else null end)
      ,Longitude = max(case when Item='Longitude' then Value else null end)
      ,StreetAddress1 = max(case when Item='StreetAddress1' then Value else null end)
      ,StreetAddress2 = max(case when Item='StreetAddress2' then Value else null end)
      ,City = max(case when Item='City' then Value else null end)
      ,State = max(case when Item='State' then Value else null end)
      ,PostalCode = max(case when Item='PostalCode' then Value else null end)
      ,Country = max(case when Item='Country' then Value else null end)
 From ( 
        Select LogID
              ,Item  = max(case when RetSeq=1 then RetVal else null end)
              ,Value = max(case when RetSeq=2 then RetVal else null end)
        From (
                Select A.LogID
                      ,Grp = B.RetSeq
                      ,C.*
                From  @YourTable A
                Cross Apply (Select RetSeq,RetVal=Replace(Replace(Replace(RetVal,'"',''),'{',''),'}','') From [dbo].[udf-Str-Parse](A.Content,',') ) B
                Cross Apply (Select * From [dbo].[udf-Str-Parse](B.RetVal,':') ) C
            ) N Group By LogID,Grp
      ) F
 Group By LogID

Returns

enter image description here


The UDF if needed

CREATE FUNCTION [dbo].[udf-Str-Parse] (@String varchar(max),@Delimiter varchar(10))
Returns Table 
As
Return (  
    Select RetSeq = Row_Number() over (Order By (Select null))
          ,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
    From (Select x = Cast('<x>'+ Replace(@String,@Delimiter,'</x><x>')+'</x>' as xml).query('.')) as A 
    Cross Apply x.nodes('x') AS B(i)
);
--Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
Comments