【发布时间】:2022-01-23 19:38:22
【问题描述】:
我有一个类似的 XML:
<Trait ID="4711" Type="Disease">
<!-- each phenotype -->
<Name>
<ElementValue Type="Preferred">Breast-ovarian cancer, familial 1</ElementValue>
<XRef ID="Breast-ovarian+cancer%2C+familial+1/7865" DB="Genetic Alliance"/>
</Name>
<Name>
<ElementValue Type="Alternate">BREAST-OVARIAN CANCER, FAMILIAL, SUSCEPTIBILITY TO, 1</ElementValue>
<XRef Type="MIM" ID="604370" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0001" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0002" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0003" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0004" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0005" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0006" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0007" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0008" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0009" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0010" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0011" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0012" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0013" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0014" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0015" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0016" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0017" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0018" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0019" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0020" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0021" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0022" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0023" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0024" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0025" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0026" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0027" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0028" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0029" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0030" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0031" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0032" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0033" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0034" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0035" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0036" DB="OMIM"/>
<XRef Type="Allelic variant" ID="113705.0037" DB="OMIM"/>
</Name>
<Name>
<ElementValue Type="Alternate">OVARIAN CANCER, SUSCEPTIBILITY TO</ElementValue>
<XRef Type="Allelic variant" ID="602667.0001" DB="OMIM"/>
</Name>
<Name>
<ElementValue Type="Alternate">BREAST CANCER, FAMILIAL, SUSCEPTIBILITY TO, 1</ElementValue>
<XRef ID="604370" DB="OMIM"/>
</Name>
<Name>
<ElementValue Type="Alternate">Breast cancer, familial 1</ElementValue>
</Name>
<Name>
<ElementValue Type="Alternate">Breast-ovarian cancer, familial 1 and 2</ElementValue>
<XRef ID="GTR000310494" DB="Laboratory of Genetics,HUSLAB"/>
</Name>
<Name>
<ElementValue Type="Alternate">BRCA1 Gene Mutation</ElementValue>
<XRef ID="GTR000501743" DB="Myriad Genetic Laboratories,Myriad Genetic Laboratories, Inc."/>
</Name>
</Trait>
我想将 XML 解析为如下数据框:
我想使用 r 包 XML,但问题是我的 XREF 属性值比特征值的名称多。 我可以用 for 循环解决这个问题,但这通常不是“R 方式”。我想知道是否有更简单的解决方案? (例如,使用 xpath 查询)。
我正在尝试这样的事情:
x <- do.call(rbind, xpathApply(xml_1, "//TraitSet/Trait[@ID='4711']/Name", function(node) {
trait <- xmlValue(node[["ElementValue"]])
xp <- "//TraitSet/Trait[@ID='4711']/Name/XRef"
DB <- sapply(c("ID","DB"), function(x) xpathSApply(xmltop, '//TraitSet/Trait/Name/XRef', xmlGetAttr, x))
if (is.null(DB)) DB <- NA
data.frame(trait, DB, stringsAsFactors = FALSE)
}))
但是记录被错误地相乘。
我将不胜感激!谢谢
【问题讨论】: