【问题标题】:R & xml2: parsing an xml document values to vector or data.frameR & xml2: 将 xml 文档值解析为 vector 或 data.frame
【发布时间】:2019-02-09 14:32:35
【问题描述】:

我正在尝试从下面的 xml 中解析变量名称、索引和值。对变量进行子集化是可行的,但是从每个变量中获取实际值有点困难。有人能指出我正确的方向吗?

require(xml2)
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
 <header
   problemName="Oil-blending.lp"
   objectiveValue="287750"
   solutionTypeValue="1"
   solutionTypeString="basic"
   solutionStatusValue="1"
   solutionStatusString="optimal"
   solutionMethodString="dual"
   primalFeasible="1"
   dualFeasible="1"
   simplexIterations="14"
   writeLevel="1"/>
 <quality
   epRHS="1e-06"
   epOpt="1e-06"
   maxPrimalInfeas="0"
   maxDualInfeas="0"
   maxPrimalResidual="9.66338120633736e-13"
   maxDualResidual="7.105427357601e-15"
   maxX="7500"
   maxPi="57.25"
   maxSlack="4000"
   maxRedCost="40.9"
   kappa="83.7880434782609"/>
 <linearConstraints>
  <constraint name="ct_demand({&quot;Super&quot;})" index="0" status="LL" slack="0" dual="-20.8"/>
  <constraint name="ct_demand({&quot;Regular&quot;})" index="1" status="LL" slack="0" dual="0.1"/>
  <constraint name="ct_demand({&quot;Diesel&quot;})" index="2" status="LL" slack="0" dual="-40.8"/>
  <constraint name="ct_capacity({&quot;Crude1&quot;})" index="3" status="LL" slack="0" dual="57.25"/>
  <constraint name="ct_capacity({&quot;Crude2&quot;})" index="4" status="LL" slack="0" dual="20.9"/>
  <constraint name="ct_capacity({&quot;Crude3&quot;})" index="5" status="BS" slack="1500" dual="0"/>
  <constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
  <constraint name="ct_octane_min({&quot;Super&quot;})" index="7" status="BS" slack="-2000" dual="-0"/>
  <constraint name="ct_octane_min({&quot;Regular&quot;})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
  <constraint name="ct_octane_min({&quot;Diesel&quot;})" index="9" status="BS" slack="-4000" dual="-0"/>
  <constraint name="ct_lead_max({&quot;Super&quot;})" index="10" status="LL" slack="0" dual="30.9"/>
  <constraint name="ct_lead_max({&quot;Regular&quot;})" index="11" status="LL" slack="0" dual="30.9"/>
  <constraint name="ct_lead_max({&quot;Diesel&quot;})" index="12" status="LL" slack="0" dual="30.9"/>
 </linearConstraints>
 <variables>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Super&quot;})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Super&quot;})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Super&quot;})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Regular&quot;})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Regular&quot;})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Regular&quot;})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Diesel&quot;})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Diesel&quot;})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Diesel&quot;})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
  <variable name="Inventory({&quot;Super&quot;})" index="9" status="LL" value="0" reducedCost="-20.9"/>
  <variable name="Inventory({&quot;Regular&quot;})" index="10" status="BS" value="7500" reducedCost="-0"/>
  <variable name="Inventory({&quot;Diesel&quot;})" index="11" status="LL" value="0" reducedCost="-40.9"/>
  <variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
 </variables>
</CPLEXSolution>'

x <- read_xml(xml_file)
vars <- xml_find_all(x, "//variables")

【问题讨论】:

    标签: r xml2


    【解决方案1】:

    xml2 包是解决此类问题的不错选择。您上面的起始代码很接近,您只需要解析出“变量”子节点并从感兴趣的属性中提取文本。

    library(xml2)
    x <- read_xml(xml_file)
    #Read parent node variables
    vars <- xml_find_all(x, "//variables")
    
    #parse the children nodes "variable"
    variable<-xml_find_all(vars, "//variable")
    #obtain the text from the "index" & "value" attributes and convert to numeric.
    vnames<-xml_attr(variable, "name")
    index<-as.integer((xml_attr(variable, "index")))
    values<-as.numeric(xml_attr(variable, "value"))
    
    data.frame(index, values)
    

    示例输出:

    data.frame(index, values)
       index    values
    1      0 2222.2222
    2      1  444.4444
    3      2  333.3333
    4      3 2111.1111
    5      4 4222.2222
    6      5 3166.6667
    7      6  666.6667
    8      7  333.3333
    9      8    0.0000
    10     9    0.0000
    11    10 7500.0000
    12    11    0.0000
    13    12    0.0000
    

    【讨论】:

      【解决方案2】:

      使用stringr

      xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
      <CPLEXSolution version="1.2">
      <header
      problemName="Oil-blending.lp"
      objectiveValue="287750"
      solutionTypeValue="1"
      solutionTypeString="basic"
      solutionStatusValue="1"
      solutionStatusString="optimal"
      solutionMethodString="dual"
      primalFeasible="1"
      dualFeasible="1"
      simplexIterations="14"
      writeLevel="1"/>
      <quality
      epRHS="1e-06"
      epOpt="1e-06"
      maxPrimalInfeas="0"
      maxDualInfeas="0"
      maxPrimalResidual="9.66338120633736e-13"
      maxDualResidual="7.105427357601e-15"
      maxX="7500"
      maxPi="57.25"
      maxSlack="4000"
      maxRedCost="40.9"
      kappa="83.7880434782609"/>
      <linearConstraints>
      <constraint name="ct_demand({&quot;Super&quot;})" index="0" status="LL" slack="0" dual="-20.8"/>
      <constraint name="ct_demand({&quot;Regular&quot;})" index="1" status="LL" slack="0" dual="0.1"/>
      <constraint name="ct_demand({&quot;Diesel&quot;})" index="2" status="LL" slack="0" dual="-40.8"/>
      <constraint name="ct_capacity({&quot;Crude1&quot;})" index="3" status="LL" slack="0" dual="57.25"/>
      <constraint name="ct_capacity({&quot;Crude2&quot;})" index="4" status="LL" slack="0" dual="20.9"/>
      <constraint name="ct_capacity({&quot;Crude3&quot;})" index="5" status="BS" slack="1500" dual="0"/>
      <constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
      <constraint name="ct_octane_min({&quot;Super&quot;})" index="7" status="BS" slack="-2000" dual="-0"/>
      <constraint name="ct_octane_min({&quot;Regular&quot;})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
      <constraint name="ct_octane_min({&quot;Diesel&quot;})" index="9" status="BS" slack="-4000" dual="-0"/>
      <constraint name="ct_lead_max({&quot;Super&quot;})" index="10" status="LL" slack="0" dual="30.9"/>
      <constraint name="ct_lead_max({&quot;Regular&quot;})" index="11" status="LL" slack="0" dual="30.9"/>
      <constraint name="ct_lead_max({&quot;Diesel&quot;})" index="12" status="LL" slack="0" dual="30.9"/>
      </linearConstraints>
      <variables>
      <variable name="Blend({&quot;Crude1&quot;})({&quot;Super&quot;})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
      <variable name="Blend({&quot;Crude2&quot;})({&quot;Super&quot;})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
      <variable name="Blend({&quot;Crude3&quot;})({&quot;Super&quot;})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
      <variable name="Blend({&quot;Crude1&quot;})({&quot;Regular&quot;})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
      <variable name="Blend({&quot;Crude2&quot;})({&quot;Regular&quot;})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
      <variable name="Blend({&quot;Crude3&quot;})({&quot;Regular&quot;})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
      <variable name="Blend({&quot;Crude1&quot;})({&quot;Diesel&quot;})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
      <variable name="Blend({&quot;Crude2&quot;})({&quot;Diesel&quot;})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
      <variable name="Blend({&quot;Crude3&quot;})({&quot;Diesel&quot;})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
      <variable name="Inventory({&quot;Super&quot;})" index="9" status="LL" value="0" reducedCost="-20.9"/>
      <variable name="Inventory({&quot;Regular&quot;})" index="10" status="BS" value="7500" reducedCost="-0"/>
      <variable name="Inventory({&quot;Diesel&quot;})" index="11" status="LL" value="0" reducedCost="-40.9"/>
      <variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
      </variables>
      </CPLEXSolution>'
      
      library(stringr)
      as.numeric(str_extract_all(xml_file,"(?<=value=\")[0-9]+.*[0-9]*(?=\" reducedCost)")[[1]])
      [1] 2222.2222  444.4444  333.3333 2111.1111 4222.2222 3166.6667  666.6667  333.3333    0.0000    0.0000
      [11] 7500.0000    0.0000    0.0000
      

      【讨论】:

        猜你喜欢
        • 2022-01-06
        • 1970-01-01
        • 2017-10-30
        • 1970-01-01
        • 1970-01-01
        • 2021-12-19
        • 1970-01-01
        • 2020-07-10
        相关资源
        最近更新 更多