Search code examples
sql-serverxmlxmi

How to fix XML parsing error: Reference to undeclared namespace prefix: 'Matrix'


I am trying to read XML file

I wrote this script but I am getting error:

DECLARE @XML AS XML, @hDoc AS INT, @SQL NVARCHAR (MAX), @rootxmlns varchar(1000)    

SELECT @XML=CONVERT(XML, BulkColumn) 
FROM OPENROWSET(BULK 'F:\__Files\Done\Output\20201223133856\2027.txt.xmi', SINGLE_BLOB) AS x;

SET @rootxmlns = '<root xmlns:soap="http://www.omg.org/XMI"/>'
EXEC sp_xml_preparedocument @hDoc OUTPUT, @XML, @rootxmlns

SELECT *
FROM OPENXML(@hDoc, 'Matrix:Customer')

I checked many links and they all propsed to use xmlns I tried many of them but they all not working with the same error

XML parsing error: Reference to undeclared namespace prefix: 'Customer'.

Here is my XMI

<?xml version="1.0" encoding="UTF-8"?>
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI"
        xmlns:util="http:///org/apache/ctakes/typesystem/type/util.ecore"
        xmlns:tcas="http:///uima/tcas.ecore"
        xmlns:cas="http:///uima/cas.ecore"
        xmlns:type10="http:///org/cleartk/syntax/constituent/type.ecore"
        xmlns:ne="http:///org/cleartk/type/ne.ecore"
        xmlns:textsem="http:///org/apache/ctakes/typesystem/type/textsem.ecore"
        xmlns:types2="http:///org/apache/ctakes/assertion/zoner/types.ecore"
        xmlns:type6="http:///org/apache/ctakes/smokingstatus/i2b2/type.ecore"
        xmlns:refsem="http:///org/apache/ctakes/typesystem/type/refsem.ecore"
        xmlns:type11="http:///org/cleartk/syntax/dependency/type.ecore"
        xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore"
        xmlns:type14="http:///org/cleartk/util/type.ecore"
        xmlns:assertion="http:///org/apache/ctakes/typesystem/type/temporary/assertion.ecore"
        xmlns:type8="http:///org/cleartk/score/type.ecore"
        xmlns:syntax="http:///org/apache/ctakes/typesystem/type/syntax.ecore"
        xmlns:type9="http:///org/cleartk/srl/type.ecore"
        xmlns:type2="http:///org/apache/ctakes/constituency/parser/uima/type.ecore"
        xmlns:types="http:///org/apache/ctakes/assertion/medfacts/types.ecore"
        xmlns:type7="http:///org/apache/ctakes/smokingstatus/type.ecore"
        xmlns:relation="http:///org/apache/ctakes/typesystem/type/relation.ecore"
        xmlns:type12="http:///org/cleartk/timeml/type.ecore"
        xmlns:type5="http:///org/apache/ctakes/sideeffect/type.ecore"
        xmlns:type15="http:///org/cleartk/ne/type.ecore"
        xmlns:type13="http:///org/cleartk/token/type.ecore"
        xmlns:structured="http:///org/apache/ctakes/typesystem/type/structured.ecore"
        xmlns:textspan="http:///org/apache/ctakes/typesystem/type/textspan.ecore"
        xmlns:libsvm="http:///org/apache/ctakes/smokingstatus/type/libsvm.ecore"
        xmlns:type3="http:///org/apache/ctakes/coreference/type.ecore"
        xmlns:type4="http:///org/apache/ctakes/drugner/type.ecore"
        xmi:version="2.0">
    <cas:NULL xmi:id="0"/>
    <tcas:DocumentAnnotation xmi:id="8"
                            sofa="1"
                            begin="0"
                            end="16975"
                            language="x-unspecified"/>
    <structured:DocumentID xmi:id="13"
                            documentID="1"/>
    <structured:DocumentIdPrefix xmi:id="15"
                                documentIdPrefix=""/>
    <structured:Metadata xmi:id="17"
                        patientIdentifier="1"
                        patientID="0"
                        sourceData="23"/>
    <structured:DocumentPath xmi:id="35"
                            documentPath="D:\__Files\Input\1\1.txt"/>
    <Matrix:Segment xmi:id="37"
                        sofa="1"
                        begin="0"
                        end="16975"
                        id="SIMPLE_SEGMENT"
                        preferredText="SIMPLE_SEGMENT"/>
    <Matrix:Customer xmi:id="44"
                        sofa="1"
                        begin="0"
                        end="15"
                        Matrix:CustomerNumber="0"/>
    <Matrix:Customer xmi:id="50"
                        sofa="1"
                        begin="17"
                        end="33"
                        Matrix:CustomerNumber="1"/>
    <Matrix:Customer xmi:id="56"
                        sofa="1"
                        begin="47"
                        end="62"
                        Matrix:CustomerNumber="2"/>
    <Matrix:Customer xmi:id="62"
                        sofa="1"
                        begin="65"
                        end="80"
                        Matrix:CustomerNumber="3"/>

Solution

  • I had to fix the input XML by adding a bogus namespace. It is better to use XQuery native methods: .nodes() and .value(), and stay away from the Microsoft proprietary OPENXML() and its companions API.

    Check it out.

    XML

    <?xml version="1.0" encoding="UTF-8"?>
    <xmi:XMI xmlns:xmi="http://www.omg.org/XMI"
             xmlns:util="http:///org/apache/ctakes/typesystem/type/util.ecore"
             xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore"
             xmlns:type10="http:///org/cleartk/syntax/constituent/type.ecore"
             xmlns:ne="http:///org/cleartk/type/ne.ecore"
             xmlns:textsem="http:///org/apache/ctakes/typesystem/type/textsem.ecore"
             xmlns:types2="http:///org/apache/ctakes/assertion/zoner/types.ecore"
             xmlns:type6="http:///org/apache/ctakes/smokingstatus/i2b2/type.ecore"
             xmlns:refsem="http:///org/apache/ctakes/typesystem/type/refsem.ecore"
             xmlns:type11="http:///org/cleartk/syntax/dependency/type.ecore"
             xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore"
             xmlns:type14="http:///org/cleartk/util/type.ecore"
             xmlns:assertion="http:///org/apache/ctakes/typesystem/type/temporary/assertion.ecore"
             xmlns:type8="http:///org/cleartk/score/type.ecore"
             xmlns:syntax="http:///org/apache/ctakes/typesystem/type/syntax.ecore"
             xmlns:type9="http:///org/cleartk/srl/type.ecore"
             xmlns:type2="http:///org/apache/ctakes/constituency/parser/uima/type.ecore"
             xmlns:types="http:///org/apache/ctakes/assertion/medfacts/types.ecore"
             xmlns:type7="http:///org/apache/ctakes/smokingstatus/type.ecore"
             xmlns:relation="http:///org/apache/ctakes/typesystem/type/relation.ecore"
             xmlns:type12="http:///org/cleartk/timeml/type.ecore"
             xmlns:type5="http:///org/apache/ctakes/sideeffect/type.ecore"
             xmlns:type15="http:///org/cleartk/ne/type.ecore"
             xmlns:type13="http:///org/cleartk/token/type.ecore"
             xmlns:structured="http:///org/apache/ctakes/typesystem/type/structured.ecore"
             xmlns:textspan="http:///org/apache/ctakes/typesystem/type/textspan.ecore"
             xmlns:libsvm="http:///org/apache/ctakes/smokingstatus/type/libsvm.ecore"
             xmlns:type3="http:///org/apache/ctakes/coreference/type.ecore"
             xmlns:type4="http:///org/apache/ctakes/drugner/type.ecore"
             xmlns:Matrix="http://someURL"
             xmi:version="2.0">
        <cas:NULL xmi:id="0"/>
        <tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="16975"
                                 language="x-unspecified"/>
        <structured:DocumentID xmi:id="13" documentID="1"/>
        <structured:DocumentIdPrefix xmi:id="15" documentIdPrefix=""/>
        <structured:Metadata xmi:id="17" patientIdentifier="1" patientID="0"
                             sourceData="23"/>
        <structured:DocumentPath xmi:id="35" documentPath="D:\__Files\Input\1\1.txt"/>
        <Matrix:Segment xmi:id="37" sofa="1" begin="0" end="16975"
                        id="SIMPLE_SEGMENT" preferredText="SIMPLE_SEGMENT"/>
        <Matrix:Customer xmi:id="44" sofa="1" begin="0" end="15"
                         Matrix:CustomerNumber="0"/>
        <Matrix:Customer xmi:id="50" sofa="1" begin="17" end="33"
                         Matrix:CustomerNumber="1"/>
        <Matrix:Customer xmi:id="56" sofa="1" begin="47" end="62"
                         Matrix:CustomerNumber="2"/>
        <Matrix:Customer xmi:id="62" sofa="1" begin="65" end="80"
                         Matrix:CustomerNumber="3"/>
    </xmi:XMI>
    

    SQL

    ;WITH XMLNAMESPACES ('http://www.omg.org/XMI' AS xmi
        , 'http://someURL' AS Matrix), rs (xmldata) AS 
    (
        SELECT CONVERT (XML, BulkColumn) 
        FROM OPENROWSET (BULK N'e:\Temp\2027.xml', SINGLE_BLOB) AS XmlData
    )
    SELECT c.value('@xmi:id', 'INT') AS xml_id
        , c.value('@sofa', 'INT') AS sofa
        , c.value('@begin', 'INT') AS [begin]
        , c.value('@end', 'INT') AS [end]
        , c.value('@Matrix:CustomerNumber', 'INT') AS CustomerNumber
    FROM rs
        CROSS APPLY xmldata.nodes('/xmi:XMI/Matrix:Customer') AS t(c);
    

    Output

    +--------+------+-------+-----+----------------+
    | xml_id | sofa | begin | end | CustomerNumber |
    +--------+------+-------+-----+----------------+
    |     44 |    1 |     0 |  15 |              0 |
    |     50 |    1 |    17 |  33 |              1 |
    |     56 |    1 |    47 |  62 |              2 |
    |     62 |    1 |    65 |  80 |              3 |
    +--------+------+-------+-----+----------------+