Search code examples
xmlgo

Unmarshal xml into custom struct - how to handle different structures for the same tag


I have a .owl like this simplified example:

<?xml version="1.0"?>
<rdf:RDF xmlns="http://www.w3.org/2002/07/owl#"
     xml:base="http://www.w3.org/2002/07/owl"
     xmlns:owl="http://www.w3.org/2002/07/owl#"
     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
     xmlns:xml="http://www.w3.org/XML/1998/namespace"
     xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
     xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
     xmlns:mc="http://my-company/ontologies#">
    <Ontology rdf:about="http://my-company/ontologies"/>

    <Class rdf:about="http://my-company/ontologies#Thing">
        <rdfs:subClassOf rdf:nodeID="genid494"/>
        <rdfs:subClassOf>
            <Restriction>
                <onProperty rdf:resource="http://my-company/ontologies#myProp1"/>
            </Restriction>
        </rdfs:subClassOf>
    </Class>
    <Restriction rdf:nodeID="genid494">
        <onProperty rdf:resource="http://my-company/ontologies#myProp2"/>
    </Restriction>
</rdf:RDF>

The problem is, that the subClassOf child nodes of Class can either have a reference to a Restriction, or embed a Restriction.

I try to unmarshal the given xml in these target structs:

type RDF struct {
    XMLName  xml.Name `xml:"RDF"`
    XMLNS    string   `xml:"xmlns,attr"`
    Base     string   `xml:"base,attr"`
    OWLNS    string   `xml:"owl,attr"`
    RDFNS    string   `xml:"rdf,attr"`
    XML      string   `xml:"xml,attr"`
    XSD      string   `xml:"xsd,attr"`
    RDFS     string   `xml:"rdfs,attr"`
    MCNS  string   `xml:"mc,attr"`
    Ontology struct {
        About *string `xml:"about,attr"`
    } `xml:"Ontology"`

    Classes      []Class       `xml:"Class,omitempty"`
    Restrictions []Restriction `xml:"Restriction,omitempty"`
}
type Restriction struct {
    OnProperty ResourceData `xml:"onProperty"`
}
type ResourceData struct {
    Resource string `xml:"resource,attr"`
}
type Class struct {
    ClassDefinition []ClassDescriptor `xml:"subClassOf,omitempty"`
}
type ClassDescriptor struct {
    Restriction *RestrictionReference `xml:"restriction"`
}
type RestrictionReference struct {
    inner     *Restriction
    reference *string
}

My goal is to create RestrictionReference which either hold the Restriction child - if available -, or a reference (and I can add some more function receivers like func (r *RestrictionReference) GetRestriction(rdf RDF) *Restriction it to hide this complexity later on).

My problem is that I fail to implement the xml.Unmarshaler for *ClassDescriptor accordingly. I came up with something like that yet, but I fail to parse the child node if necessary:

func (description *ClassDescriptor) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
    // easier case: there is just a reference
    for _, attr := range start.Attr {
        switch attr.Name.Local {
        case "resource":
            cp := attr.Value
            description.Restriction.reference = &cp
        case "nodeID":
            nodeID := attr.Value
            description.Restriction.reference = &nodeID
        default:
            return errors.New("unknown xml attribute:", attr.Name.Local)
        }
    }
    // temporary? workaround to avoid the "did not consume the entire element" err for the easier case
    if len(start.Attr) == 0 {
        return d.Skip()
    } else {
        // complicated case: there is a child node with the Restriction
        // TODO unmarshal Restriction and set it in the RestrictionReference prop
        var redData Restriction
        // will be empty, but I don't know what to pass here instead
        err := d.DecodeElement(&redData, &start)
        description.Restriction.inner = &redData
        return err
    }
}

I have no idea how I can unmarshal the child node. Is my approach even valid?

Edit: I guess I could just avoid the question with this declaring both possibilities in the ClassDescriptor struct like this:

type ClassDescriptor struct {
    NodeID      *NodeID      `xml:"nodeID,attr"`
    Restriction *Restriction `xml:"Restriction"` 
}

But this renders the handling from outside a bit more messy then I wanted, I still don't know how to actually implement an xml.Unmarshaller which reads a child node.


Solution

  • Okay, let's simplify that a bit.

    Assuming you want to decode

    <RDF>
        <Class>
            <subClassOf nodeID="genid494"/>
            <subClassOf>
                <Restriction>
                    <onProperty resource="http://my-company/ontologies#myProp1"/>
                </Restriction>
            </subClassOf>
        </Class>
    </RDF>
    

    and having an attribute on a subClassOf is a reliable criteria, you could try (Go Playground):

    package main
    
    import (
        "encoding/xml"
        "fmt"
    )
    
    type RDF struct {
        Classes []Class `xml:"Class"`
    }
    
    type Class struct {
        ClassDefinition []ClassDescriptor `xml:"subClassOf"`
    }
    
    type ClassDescriptor struct {
        NodeID     string
        OnProperty string
    }
    
    func (description *ClassDescriptor) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
        if len(start.Attr) > 0 {
            var v struct {
                NodeID string `xml:"nodeID,attr"`
            }
            if err := d.DecodeElement(&v, &start); err != nil {
                return fmt.Errorf("can't decode node id on %+v: %w", start, err)
            }
            description.NodeID = v.NodeID
        } else {
            var v struct {
                Restriction struct {
                    OnProperty struct {
                        Resource string `xml:"resource,attr"`
                    } `xml:"onProperty"`
                } `xml:"Restriction"`
            }
            if err := d.DecodeElement(&v, &start); err != nil {
                return fmt.Errorf("can't decode node id on %+v: %w", start, err)
            }
            description.OnProperty = v.Restriction.OnProperty.Resource
        }
    
        return nil
    }
    

    This is of course very simplified, but shows how to decode XML in two separate structures when you know which one you need.

    Obviously, since you can do whatever you want in

    type ClassDescriptor struct {
        NodeID     string
        OnProperty string
    }
    

    you could replace both alternatives with a single interface of your choice.

    Does this answer your question or did I misinterpret something?