Search code examples
phpxmllaravelxml-parsingnsxmlparser

How to parse USPTO xml response in laravel 5


I've been trying to parse response from USPTO assignments api using laravel and everything seems to be wrong. I've really never done so much with xml before this.

Here is sample of xml:

   <?xml version="1.0" encoding="UTF-8"?>
<response>
    <lst name="responseHeader">
        <int name="status">0</int>
        <int name="QTime">78</int>
        <lst name="params">
            <str name="fl">id,reelNo,frameNo,conveyanceText,patAssigneeName,patAssignorName,inventionTitleFirst,applNumFirst,publNumFirst,patNumFirst,intlRegNumFirst,corrName,corrAddress1,corrAddress2,corrAddress3,patAssignorEarliestExDate,filingDateFirst,publDateFirst,issueDateFirst,intlPublDateFirst,patNumSize,applNum,applNumSize,intlRegNum</str>
            <str name="sort">patAssignorEarliestExDate desc</str>
            <str name="start">0</str>
            <str name="q">Ojo</str>
            <str name="wt">javabin</str>
            <str name="qt">/apisearch</str>
            <str name="fq">applNum:(13096729)</str>
            <str name="version">2</str>
            <str name="rows">8</str>
        </lst>
    </lst>
    <result name="response" numFound="1" start="0">
        <doc>
            <str name="id">48719-909</str>
            <str name="reelNo">48719</str>
            <str name="frameNo">909</str>
            <str name="conveyanceText">ASSIGNMENT OF ASSIGNORS INTEREST (SEE DOCUMENT FOR DETAILS).</str>
            <str name="corrName">LOCKE LORD LLP</str>
            <str name="corrAddress1">P.O. BOX 55874</str>
            <str name="corrAddress2">BOSTON, MA 02205</str>
            <date name="patAssignorEarliestExDate">2019-03-20T04:00:00Z</date>
            <arr name="patAssignorName">
                <str>CLAMEN, LIANE</str>
                <str>GLAZER, PAUL</str>
            </arr>
            <arr name="patAssigneeName">
                <str>OJO, LLC</str>
            </arr>
            <arr name="applNum">
                <str>13096729</str>
            </arr>
            <arr name="intlRegNum">
                <str>NULL</str>
            </arr>
            <int name="applNumSize">1</int>
            <int name="patNumSize">1</int>
            <str name="inventionTitleFirst">INJECTABLE PHYSIOLOGICALLY ADAPTIVE INTRAOCULAR LENSES (IOL'S)</str>
            <str name="applNumFirst">13096729</str>
            <date name="filingDateFirst">2011-04-28T04:00:00Z</date>
            <date name="intlPublDateFirst">0001-01-01T00:00:00Z</date>
            <str name="intlRegNumFirst">NULL</str>
            <date name="issueDateFirst">0001-01-01T00:00:00Z</date>
            <str name="patNumFirst">NULL</str>
            <date name="publDateFirst">2011-11-03T04:00:00Z</date>
            <str name="publNumFirst">20110270389</str>
        </doc>
    </result>
    <lst name="facet_counts">
        <lst name="facet_queries">
            <int name="conveyanceText:&quot;ASSIGNMENT OF ASSIGNORS INTEREST&quot;">1</int>
            <int name="conveyanceText:&quot;SECURITY INTEREST&quot;">0</int>
            <int name="conveyanceText:&quot;NUNC PRO TUNC ASSIGNMENT&quot;">0</int>
            <int name="conveyanceText:&quot;RELEASE BY SECURED PARTY&quot;">0</int>
            <int name="conveyanceText:&quot;MERGER&quot;">0</int>
            <int name="conveyanceText:&quot;CHANGE OF NAME&quot;">0</int>
            <int name="conveyanceText:&quot;MERGER AND CHANGE OF NAME&quot;">0</int>
            <int name="conveyanceText:&quot;LICENSE&quot;">0</int>
            <int name="conveyanceText:&quot;LIEN&quot;">0</int>
            <int name="conveyanceText:&quot;MORTGAGE&quot;">0</int>
            <int name="conveyanceText:&quot;OPTION&quot;">0</int>
            <int name="conveyanceText:&quot;DECREE OF DISTRIBUTION&quot;">0</int>
            <int name="conveyanceText:&quot;LETTERS OF TESTAMENTARY&quot;">0</int>
            <int name="conveyanceText:&quot;LETTERS OF ADMINISTRATION&quot;">0</int>
            <int name="conveyanceText:&quot;COURT APPOINTMENT&quot;">0</int>
            <int name="conveyanceText:&quot;CONDITIONAL ASSIGNMENT&quot;">0</int>
            <int name="conveyanceText:&quot;COURT ORDER&quot;">0</int>
        </lst>
        <lst name="facet_fields">
            <lst name="patAssigneeCityFacet">
                <int name="CHESTNUT HILL">1</int>
            </lst>
            <lst name="patAssigneeStateFacet">
                <int name="MASSACHUSETTS">1</int>
            </lst>
            <lst name="patAssigneePostcodeFacet">
                <int name="02467">1</int>
            </lst>
            <lst name="patAssigneeCountryNameFacet">
                <int name="NULL">1</int>
            </lst>
            <lst name="patAssigneeNameFacet">
                <int name="OJO, LLC">1</int>
            </lst>
            <lst name="corrNameFacet">
                <int name="LOCKE LORD LLP">1</int>
            </lst>
            <lst name="patAssignorNameFacet">
                <int name="CLAMEN, LIANE">1</int>
                <int name="GLAZER, PAUL">1</int>
            </lst>
            <lst name="applNum">
                <int name="13096729">1</int>
            </lst>
            <lst name="patNum">
                <int name="NULL">1</int>
            </lst>
            <lst name="publNum">
                <int name="20110270389">1</int>
            </lst>
            <lst name="intlRegNum">
                <int name="NULL">1</int>
            </lst>
        </lst>
    </lst>
    <lst name="highlighting">
        <lst name="48719-909">
            <arr name="patAssigneeName">
                <str>&lt;em&gt;OJO&lt;/em&gt;, LLC</str>
            </arr>
        </lst>
    </lst>
</response>

Here is what I've tried:

    $client = new Client(array( 'curl' => array( CURLOPT_SSL_VERIFYPEER => false, CURLOPT_SSL_VERIFYHOST =>0), ));
    $res = $client->request('GET', 'https://assignment-api.uspto.gov/patent/basicSearch?query='.$request->input('query'));
    $respons = $res->getBody()->getContents();
    // dd($respons);
    $xml = XmlParser::extract($respons);
    $doc = $xml->parse([
        'publNum' => ['uses' => 'result.publNum'],
        'inventionTitle' => ['uses' => 'result.inventionTitle'],
        'inventors' => ['uses' => 'result.inventors'],
        'corrName' => ['uses' => 'result.corrName'],
        'corrAddress1' => ['uses' => 'result.corrAddress1'],
        'corrAddress2' => ['uses' => 'result.corrAddress2'],
        'corrAddress3' => ['uses' => 'result.corrAddress3'],
        'patAssigneeName' => ['uses' => 'result.patAssigneeName'],
        'patAssigneeAddress1' => ['uses' => 'result.patAssigneeAddress1'],
        'patAssigneeAddress2' => ['uses' => 'result.patAssigneeAddress2'],
        'patAssigneeCity' => ['uses' => 'result.patAssigneeCity'],
        'patAssigneeState' => ['uses' => 'result.patAssigneeState'],
        'patAssigneeCountryName' => ['uses' => 'result.patAssigneeCountryName'],
    ]);

    dd($doc);

I've tried using SimpleXml too but all seems not to be working. Please any help will be highly appreciated. Thanks


Solution

  • It may be due to the selector your using is an attribute and not the actual element name. This would be the same with SimpleXML etc. but what I have done here is to process it in two steps. The first converts it into an associative array using the name attribute as the key and the value. If the value has sub elements, then these in themselves are added as arrays...

    $xml = simplexml_load_string($respons);
    $details = [];
    foreach ( $xml->result->doc->children() as $element )   {
        if ( count($element) > 0 )  {
            foreach ( $element->children() as $sub )   {
                $details[(string)$element['name']][] = (string)$sub;
            }
        }
        else    {
            $details[(string)$element['name']] = (string)$element;
        }
    }
    
    print_r($details);
    

    will give an output of

    Array
    (
        [id] => 48719-909
        [reelNo] => 48719
        [frameNo] => 909
        [conveyanceText] => ASSIGNMENT OF ASSIGNORS INTEREST (SEE DOCUMENT FOR DETAILS).
        [corrName] => LOCKE LORD LLP
        [corrAddress1] => P.O. BOX 55874
        [corrAddress2] => BOSTON, MA 02205
        [patAssignorEarliestExDate] => 2019-03-20T04:00:00Z
        [patAssignorName] => Array
            (
                [0] => CLAMEN, LIANE
                [1] => GLAZER, PAUL
            )
    
        [patAssigneeName] => Array
            (
                [0] => OJO, LLC
            )
    
        [applNum] => Array
            (
                [0] => 13096729
            )
    
        [intlRegNum] => Array
            (
                [0] => NULL
            )
    
        [applNumSize] => 1
        [patNumSize] => 1
        [inventionTitleFirst] => INJECTABLE PHYSIOLOGICALLY ADAPTIVE INTRAOCULAR LENSES (IOL'S)
        [applNumFirst] => 13096729
        [filingDateFirst] => 2011-04-28T04:00:00Z
        [intlPublDateFirst] => 0001-01-01T00:00:00Z
        [intlRegNumFirst] => NULL
        [issueDateFirst] => 0001-01-01T00:00:00Z
        [patNumFirst] => NULL
        [publDateFirst] => 2011-11-03T04:00:00Z
        [publNumFirst] => 20110270389
    )
    

    (Note that this only picks out the values from the <result> element).

    Then you can access it using something like

    'publNum' => $details['publNumFirst'],
    'inventionTitle' => $details['inventionTitleFirst'],
    

    This does assume there is only 1 item to read, if there are more it would be a case of adjusting the foreach() and keeping the results as an array of arrays (ask if you get stuck).