Search code examples
rxml2

R & xml2: parsing an xml document values to vector or data.frame


I'm trying to parse the variable names, indexes and values from the below xml. Subsetting on the variables works, but getting the actual values from each of the variables has been a bit of a struggle. Could someone point me in the right direction?

require(xml2)
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
 <header
   problemName="Oil-blending.lp"
   objectiveValue="287750"
   solutionTypeValue="1"
   solutionTypeString="basic"
   solutionStatusValue="1"
   solutionStatusString="optimal"
   solutionMethodString="dual"
   primalFeasible="1"
   dualFeasible="1"
   simplexIterations="14"
   writeLevel="1"/>
 <quality
   epRHS="1e-06"
   epOpt="1e-06"
   maxPrimalInfeas="0"
   maxDualInfeas="0"
   maxPrimalResidual="9.66338120633736e-13"
   maxDualResidual="7.105427357601e-15"
   maxX="7500"
   maxPi="57.25"
   maxSlack="4000"
   maxRedCost="40.9"
   kappa="83.7880434782609"/>
 <linearConstraints>
  <constraint name="ct_demand({&quot;Super&quot;})" index="0" status="LL" slack="0" dual="-20.8"/>
  <constraint name="ct_demand({&quot;Regular&quot;})" index="1" status="LL" slack="0" dual="0.1"/>
  <constraint name="ct_demand({&quot;Diesel&quot;})" index="2" status="LL" slack="0" dual="-40.8"/>
  <constraint name="ct_capacity({&quot;Crude1&quot;})" index="3" status="LL" slack="0" dual="57.25"/>
  <constraint name="ct_capacity({&quot;Crude2&quot;})" index="4" status="LL" slack="0" dual="20.9"/>
  <constraint name="ct_capacity({&quot;Crude3&quot;})" index="5" status="BS" slack="1500" dual="0"/>
  <constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
  <constraint name="ct_octane_min({&quot;Super&quot;})" index="7" status="BS" slack="-2000" dual="-0"/>
  <constraint name="ct_octane_min({&quot;Regular&quot;})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
  <constraint name="ct_octane_min({&quot;Diesel&quot;})" index="9" status="BS" slack="-4000" dual="-0"/>
  <constraint name="ct_lead_max({&quot;Super&quot;})" index="10" status="LL" slack="0" dual="30.9"/>
  <constraint name="ct_lead_max({&quot;Regular&quot;})" index="11" status="LL" slack="0" dual="30.9"/>
  <constraint name="ct_lead_max({&quot;Diesel&quot;})" index="12" status="LL" slack="0" dual="30.9"/>
 </linearConstraints>
 <variables>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Super&quot;})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Super&quot;})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Super&quot;})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Regular&quot;})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Regular&quot;})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Regular&quot;})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Diesel&quot;})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Diesel&quot;})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Diesel&quot;})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
  <variable name="Inventory({&quot;Super&quot;})" index="9" status="LL" value="0" reducedCost="-20.9"/>
  <variable name="Inventory({&quot;Regular&quot;})" index="10" status="BS" value="7500" reducedCost="-0"/>
  <variable name="Inventory({&quot;Diesel&quot;})" index="11" status="LL" value="0" reducedCost="-40.9"/>
  <variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
 </variables>
</CPLEXSolution>'

x <- read_xml(xml_file)
vars <- xml_find_all(x, "//variables")

Solution

  • The xml2 package is a good choice for this type of problem. Your starting code above was close, you just needed to parse out the "variable" children nodes and extract the text from the attribute of interest.

    library(xml2)
    x <- read_xml(xml_file)
    #Read parent node variables
    vars <- xml_find_all(x, "//variables")
    
    #parse the children nodes "variable"
    variable<-xml_find_all(vars, "//variable")
    #obtain the text from the "index" & "value" attributes and convert to numeric.
    vnames<-xml_attr(variable, "name")
    index<-as.integer((xml_attr(variable, "index")))
    values<-as.numeric(xml_attr(variable, "value"))
    
    data.frame(index, values)
    

    Sample output:

    data.frame(index, values)
       index    values
    1      0 2222.2222
    2      1  444.4444
    3      2  333.3333
    4      3 2111.1111
    5      4 4222.2222
    6      5 3166.6667
    7      6  666.6667
    8      7  333.3333
    9      8    0.0000
    10     9    0.0000
    11    10 7500.0000
    12    11    0.0000
    13    12    0.0000