Shubham Mehta Shubham Mehta - 5 months ago 10
SQL Question

How to import this XML data into a MySQL database table?

I'm trying to import data (that was exported to me by an admin) from this XML file into my phpMyAdmin MySQL database table for local development purposes. Except that I don't know how to do it.

I looked into the

LOAD XML INFILE
query to load XML into one of the tables I built in my local database. However when I tried to do so either by the query

LOAD XML INFILE 'D://Secondary/wamp64/tmp/Part_Info.xml'
INTO TABLE dbtest.part_no
ROWS IDENTIFIED BY '<Row>'


OR

LOAD XML INFILE 'D://Secondary/wamp64/tmp/Part_Info.xml'
INTO TABLE dbtest.part_no
ROWS IDENTIFIED BY '<Data>'


the table got filled with NULL values everywhere.
I assumed this data is not in the format required by
LOAD XML INFILE
to work. (as referred from this question here).

How can I then import this data into my MySQL database table?

Here's the data that needs to be imported.

<?xml version="1.0"?>
<?mso-application progid="Excel.Sheet"?>
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:html="http://www.w3.org/TR/REC-html40">
<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
<Author>ANKUR PRAKASH</Author>
<LastAuthor>Shubham Mehta</LastAuthor>
<Created>2016-06-10T10:48:33Z</Created>
<LastSaved>2016-06-10T16:23:58Z</LastSaved>
<Version>16.00</Version>
</DocumentProperties>
<OfficeDocumentSettings xmlns="urn:schemas-microsoft-com:office:office">
<AllowPNG/>
</OfficeDocumentSettings>
<ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel">
<WindowHeight>4455</WindowHeight>
<WindowWidth>15345</WindowWidth>
<WindowTopX>0</WindowTopX>
<WindowTopY>0</WindowTopY>
<ProtectStructure>False</ProtectStructure>
<ProtectWindows>False</ProtectWindows>
</ExcelWorkbook>
<Styles>
<Style ss:ID="Default" ss:Name="Normal">
<Alignment ss:Vertical="Bottom"/>
<Borders/>
<Font ss:FontName="Calibri" x:Family="Swiss" ss:Size="11" ss:Color="#000000"/>
<Interior/>
<NumberFormat/>
<Protection/>
</Style>
<Style ss:ID="s62">
<Alignment ss:Vertical="Bottom"/>
<Borders/>
<Font ss:FontName="Calibri" x:Family="Swiss" ss:Size="11" ss:Color="#000000"/>
<Interior/>
<NumberFormat ss:Format="0"/>
<Protection/>
</Style>
<Style ss:ID="s63">
<Alignment ss:Vertical="Bottom"/>
<Borders/>
<Font ss:FontName="Calibri" x:Family="Swiss" ss:Size="11" ss:Color="#000000"/>
<Interior/>
<NumberFormat ss:Format="@"/>
<Protection/>
</Style>
<Style ss:ID="s64">
<Alignment ss:Vertical="Bottom"/>
<Borders/>
<Font ss:FontName="Arial" ss:Bold="1"/>
<Interior/>
<NumberFormat/>
<Protection/>
</Style>
<Style ss:ID="s65">
<Font ss:FontName="Calibri" x:Family="Swiss" ss:Size="11" ss:Color="#000000"
ss:Bold="1"/>
</Style>
</Styles>
<Worksheet ss:Name="mltvwallinfotbl">
<Table ss:ExpandedColumnCount="12" ss:ExpandedRowCount="2001" x:FullColumns="1"
x:FullRows="1" ss:DefaultRowHeight="15">
<Column ss:StyleID="s62" ss:AutoFitWidth="0"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="92.25"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="102"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="52.5"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="180.75"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="84.75"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="35.25"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="143.25"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="67.5"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="72"/>
<Column ss:StyleID="s63" ss:AutoFitWidth="0" ss:Width="90.75"/>
<Column ss:AutoFitWidth="0" ss:Width="81.75"/>
<Row ss:AutoFitHeight="0">
<Cell ss:StyleID="s64"><Data ss:Type="String">#</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">Part #</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">Mfr. Part #</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">Mfr. #</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">Part Description</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">Prime Part #</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">PMA</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">Key Word</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">Part Type</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">Planning Type</Data></Cell>
<Cell ss:StyleID="s64"><Data ss:Type="String">Reference Status</Data></Cell>
<Cell ss:StyleID="s65"><Data ss:Type="String">Inventory Status</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">1</Data></Cell>
<Cell><Data ss:Type="String">444-00522-700</Data></Cell>
<Cell><Data ss:Type="String">444-00522-700</Data></Cell>
<Cell><Data ss:Type="String">50010000</Data></Cell>
<Cell><Data ss:Type="String">INDICATOR,OXYG. HIGH PRESSURE</Data></Cell>
<Cell><Data ss:Type="String">444-00522-700</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Component</Data></Cell>
<Cell><Data ss:Type="String">None</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">0</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">2</Data></Cell>
<Cell><Data ss:Type="String">444-01095-000</Data></Cell>
<Cell><Data ss:Type="String">444-01095-000</Data></Cell>
<Cell><Data ss:Type="String">50011658</Data></Cell>
<Cell><Data ss:Type="String">BELLOWS</Data></Cell>
<Cell><Data ss:Type="String">444-01095-000</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Expendable</Data></Cell>
<Cell><Data ss:Type="String">Reorder Level</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">0</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">3</Data></Cell>
<Cell><Data ss:Type="String">444-01096-000</Data></Cell>
<Cell><Data ss:Type="String">444-01096-000</Data></Cell>
<Cell><Data ss:Type="String">50011658</Data></Cell>
<Cell><Data ss:Type="String">UNION</Data></Cell>
<Cell><Data ss:Type="String">444-01096-000</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Expendable</Data></Cell>
<Cell><Data ss:Type="String">Reorder Level</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">87</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">4</Data></Cell>
<Cell><Data ss:Type="String">444-01298-400</Data></Cell>
<Cell><Data ss:Type="String">444-01298-400</Data></Cell>
<Cell><Data ss:Type="String">50010000</Data></Cell>
<Cell><Data ss:Type="String">ELECTRONIC PR SENSOR MODULE</Data></Cell>
<Cell><Data ss:Type="String">444-01298-400</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Component</Data></Cell>
<Cell><Data ss:Type="String">None</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">58</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">5</Data></Cell>
<Cell><Data ss:Type="String">444-01298-800</Data></Cell>
<Cell><Data ss:Type="String">444-01298-800</Data></Cell>
<Cell><Data ss:Type="String">50011658</Data></Cell>
<Cell><Data ss:Type="String">PR.SENSR</Data></Cell>
<Cell><Data ss:Type="String">444-01298-400</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Component</Data></Cell>
<Cell><Data ss:Type="String">None</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">69</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">6</Data></Cell>
<Cell><Data ss:Type="String">444-01300-300</Data></Cell>
<Cell><Data ss:Type="String">444-01300-300</Data></Cell>
<Cell><Data ss:Type="String">50011658</Data></Cell>
<Cell><Data ss:Type="String">VALVE</Data></Cell>
<Cell><Data ss:Type="String">444-01300-300</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Expendable</Data></Cell>
<Cell><Data ss:Type="String">None</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">22</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">7</Data></Cell>
<Cell><Data ss:Type="String">443W5809JA078</Data></Cell>
<Cell><Data ss:Type="String">443W5809JA078</Data></Cell>
<Cell><Data ss:Type="String">50010001</Data></Cell>
<Cell><Data ss:Type="String">CARPET</Data></Cell>
<Cell><Data ss:Type="String">443W5809JA078</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Expendable</Data></Cell>
<Cell><Data ss:Type="String">None</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">48</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">8</Data></Cell>
<Cell><Data ss:Type="String">443W5809JA079</Data></Cell>
<Cell><Data ss:Type="String">443W5809JA079</Data></Cell>
<Cell><Data ss:Type="String">50010001</Data></Cell>
<Cell><Data ss:Type="String">CARPET</Data></Cell>
<Cell><Data ss:Type="String">443W5809JA079</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Expendable</Data></Cell>
<Cell><Data ss:Type="String">None</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">98</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">9</Data></Cell>
<Cell><Data ss:Type="String">443W7000-7VG</Data></Cell>
<Cell><Data ss:Type="String">443W7000-7VG</Data></Cell>
<Cell><Data ss:Type="String">50010001</Data></Cell>
<Cell><Data ss:Type="String">KICKSTRIP</Data></Cell>
<Cell><Data ss:Type="String">443W7000-7VG</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Expendable</Data></Cell>
<Cell><Data ss:Type="String">None</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">25</Data></Cell>
</Row>
<Row ss:AutoFitHeight="0">
<Cell><Data ss:Type="Number">10</Data></Cell>
<Cell><Data ss:Type="String">443W7000-7WD</Data></Cell>
<Cell><Data ss:Type="String">443W7000-7WD</Data></Cell>
<Cell><Data ss:Type="String">50010001</Data></Cell>
<Cell><Data ss:Type="String">KICKSTRIP</Data></Cell>
<Cell><Data ss:Type="String">443W7000-7WD</Data></Cell>
<Cell><Data ss:Type="String">No</Data></Cell>
<Cell ss:Index="9"><Data ss:Type="String">Expendable</Data></Cell>
<Cell><Data ss:Type="String">None</Data></Cell>
<Cell><Data ss:Type="String">Active</Data></Cell>
<Cell ss:Formula="=RANDBETWEEN(0,100)"><Data ss:Type="Number">2</Data></Cell>
</Row>
</Table>
<WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel">
<Unsynced/>
<Print>
<ValidPrinterInfo/>
<PaperSizeIndex>9</PaperSizeIndex>
<HorizontalResolution>600</HorizontalResolution>
<VerticalResolution>600</VerticalResolution>
</Print>
<Selected/>
<TopRowVisible>75</TopRowVisible>
<LeftColumnVisible>2</LeftColumnVisible>
<Panes>
<Pane>
<Number>3</Number>
<ActiveRow>107</ActiveRow>
<ActiveCol>4</ActiveCol>
</Pane>
</Panes>
<ProtectObjects>False</ProtectObjects>
<ProtectScenarios>False</ProtectScenarios>
</WorksheetOptions>
</Worksheet>
<ss:DocumentProperties>
<ss:Author>Ramco Systems Ltd</ss:Author>
<ss:LastAuthor>Ramco Systems Ltd</ss:LastAuthor>
<ss:Created>Friday, June 10, 2016</ss:Created>
</ss:DocumentProperties>
</Workbook>


NOTE: I'm not looking for a database table to specifically fit this data in. I just want to store the data in this .XML file to my local SQL database table. You are free to assume any table structure for the answer that suits you and solves the problem. However just to show my own efforts at solving it, here's the table structure I used:

DROP TABLE IF EXISTS `part_no`;

CREATE TABLE IF NOT EXISTS `part_no` (
`#` varchar(5) NOT NULL,
`Part #` varchar(50) DEFAULT NULL,
`Mfr. Part #` varchar(50) DEFAULT NULL,
`Mfr. #` varchar(50) DEFAULT NULL,
`Part Description` varchar(50) DEFAULT NULL,
`Prime Part #` varchar(50) DEFAULT NULL,
`PMA` varchar(50) DEFAULT NULL,
`Key Word` varchar(50) DEFAULT NULL,
`Part Type` varchar(50) DEFAULT NULL,
`Planning Type` varchar(50) DEFAULT NULL,
`Reference Status` varchar(50) DEFAULT NULL,
`Inventory Status` varchar(4) DEFAULT NULL,
PRIMARY KEY (`#`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

Answer

Consider running XSLT to transform the original XML into the format MySQL requires of LOAD XML INFILE. Your original format has quite a bit of namespaces and other nested elements that my interfere with the MySQL upload. Like most general purpose languages, PHP comes equipped with an XSLT 1.0 processor.

One important item is to rename the database field names, specifically removing spaces, #, and periods to properly fit xml tag names (which even follows best design practices in RDBMS).

XSLT Script (save as .xsl file to be referenced below)

<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"               
               xmlns:s="urn:schemas-microsoft-com:office:spreadsheet"
               exclude-result-prefixes="s">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>

  <xsl:template match="s:Workbook">
    <data>
      <xsl:apply-templates select="s:Worksheet/s:Table/s:Row[position() &gt; 1]"/>
    </data>
  </xsl:template>

  <xsl:template match="s:Worksheet/s:Table/s:Row[position() &gt; 1]">
    <row>        
        <No><xsl:value-of select="s:Cell[1]/s:Data"/></No>
        <PartNo><xsl:value-of select="s:Cell[2]/s:Data"/></PartNo>
        <MfrPartNo><xsl:value-of select="s:Cell[3]/s:Data"/></MfrPartNo>
        <MfrNo><xsl:value-of select="s:Cell[4]/s:Data"/></MfrNo>
        <PartDescription><xsl:value-of select="s:Cell[5]/s:Data"/></PartDescription>
        <PrimePart><xsl:value-of select="s:Cell[6]/s:Data"/></PrimePart>
        <PMA><xsl:value-of select="s:Cell[7]/s:Data"/></PMA>
        <KeyWord><xsl:value-of select="s:Cell[8]/s:Data"/></KeyWord>
        <PartType><xsl:value-of select="s:Cell[9]/s:Data"/></PartType>
        <PlanningType><xsl:value-of select="s:Cell[10]/s:Data"/></PlanningType>
        <ReferenceStatus><xsl:value-of select="s:Cell[11]/s:Data"/></ReferenceStatus>
        <InventoryStatus><xsl:value-of select="s:Cell[12]/s:Data"/></InventoryStatus>
    </row>
  </xsl:template>
</xsl:transform>

PHP Script

// LOAD XML AND XSL FILES
$xml = new DOMDocument('1.0', 'UTF-8');
$xml->load('Input.xml');

$xslfile = new DOMDocument('1.0', 'UTF-8');
$xslfile->load('XSLTScript.xsl');

// TRANSFORM XML with XSLT
$proc = new XSLTProcessor;
$proc->importStyleSheet($xslfile); 
$newXml = $proc->transformToXML($xml);

// OUTPUT TO FILE
file_put_contents('Output.xml', $newXml);

XML Output (use this output file to load into MySQL, again rename columns accordingly)

<?xml version="1.0" encoding="UTF-8"?>
<data>
  <row>
    <No>1</No>
    <PartNo>444-00522-700</PartNo>
    <MfrPartNo>444-00522-700</MfrPartNo>
    <MfrNo>50010000</MfrNo>
    <PartDescription>INDICATOR,OXYG. HIGH PRESSURE</PartDescription>
    <PrimePart>444-00522-700</PrimePart>
    <PMA>No</PMA>
    <KeyWord>Component</KeyWord>
    <PartType>None</PartType>
    <PlanningType>Active</PlanningType>
    <ReferenceStatus>0</ReferenceStatus>
    <InventoryStatus/>
  </row>
  <row>
    <No>2</No>
    <PartNo>444-01095-000</PartNo>
    <MfrPartNo>444-01095-000</MfrPartNo>
    <MfrNo>50011658</MfrNo>
    <PartDescription>BELLOWS</PartDescription>
    <PrimePart>444-01095-000</PrimePart>
    <PMA>No</PMA>
    <KeyWord>Expendable</KeyWord>
    <PartType>Reorder Level</PartType>
    <PlanningType>Active</PlanningType>
    <ReferenceStatus>0</ReferenceStatus>
    <InventoryStatus/>
  </row>
  <row>
    <No>3</No>
    <PartNo>444-01096-000</PartNo>
    <MfrPartNo>444-01096-000</MfrPartNo>
    <MfrNo>50011658</MfrNo>
    <PartDescription>UNION</PartDescription>
    <PrimePart>444-01096-000</PrimePart>
    <PMA>No</PMA>
    <KeyWord>Expendable</KeyWord>
    <PartType>Reorder Level</PartType>
    <PlanningType>Active</PlanningType>
    <ReferenceStatus>87</ReferenceStatus>
    <InventoryStatus/>
  </row>
...
</data>