【问题标题】:Parsing Complex XML in JAVA在 JAVA 中解析复杂的 XML
【发布时间】:2020-01-23 20:49:40
【问题描述】:

我是 Stackoverflow 的新手,所以我正在掌握它的工作方式! :)

我正在编写一个程序,它需要我解析一些 XML 文件,但是我遇到了一些困难,因为该文件非常复杂。

尝试使用 DOM,非常感谢任何建议。

<?xml version="1.0" encoding="UTF-8"?>

<REPORT xmlns:xs="http://www.w3.org/2001/XMLSchema">
    <REPORT_HEADER statement_id="4" registered_currency="EUR" run_type="INDIC" publication_timestamp="2019-01-21T22:44:36+00:00" publication_date="2019-01-21" settlement_date="2019-01-20" market_name="B" participant_name="PT" report_type="SS"/>
    <REPORT_SUMMARY>
        <CHARGE_SUMMARY amount="0.0000" date="2019-01-20" name="CPREMIUM"/>
        <CHARGE_SUMMARY amount="-3.8895" date="2019-01-20" name="CUNIMB"/>
    </REPORT_SUMMARY>
    <REPORT_DETAIL>
        <RESOURCE name="GU">
            <CHARGE name="CAB">
                <VALUE amount="3.0000" datetime="2019-01-20T00:30:00+00:00"/>
                <VALUE amount="0.0000" datetime="2019-01-20T01:30:00+00:00"/>
            </CHARGE>
            <CHARGE name="CPO">
                <VALUE amount="0.0000" datetime="2019-01-20T00:30:00+00:00"/>
                <VALUE amount="0.0000" datetime="2019-01-20T01:30:00+00:00"/>
            </CHARGE>
        </RESOURCE>
        <RESOURCE name="PU">
            <CHARGE name="COD">
                <VALUE amount="0.0000" datetime="2019-01-20T00:30:00+00:00"/>
                <VALUE amount="0.0000" datetime="2019-01-20T01:30:00+00:00"/>
            </CHARGE>
            <CHARGE name="MOD">
                <VALUE amount="1.0000" datetime="2019-01-20T00:30:00+00:00"/>
                <VALUE amount="2.0000" datetime="2019-01-20T01:30:00+00:00"/>
            </CHARGE>
        </RESOURCE>
    </REPORT_DETAIL>
</REPORT>

所需的输出,我的目标。

CHARGE_SUMMARY amount="0.0000" date="2019-01-20" name="CPREMIUM"
CHARGE_SUMMARY amount="-3.8895" date="2019-01-20" name="CUNIMB"

RESOURCE name="GU" CHARGE name="CAB" amount="3.0000" datetime="2019-01-20T00:30:00+00:00"
RESOURCE name="GU" CHARGE name="CAB" amount="0.0000" datetime="2019-01-20T01:30:00+00:00"
RESOURCE name="GU" CHARGE name="CPO" amount="0.0000" datetime="2019-01-20T00:30:00+00:00"
RESOURCE name="GU" CHARGE name="CPO" amount="0.0000" datetime="2019-01-20T01:30:00+00:00"

RESOURCE name="PU" CHARGE name="COD  VALUE amount="0.0000" datetime="2019-01-20T00:30:00+00:00"
RESOURCE name="PU" CHARGE name="COD  VALUE amount="0.0000" datetime="2019-01-20T01:30:00+00:00"
RESOURCE name="PU" CHARGE name="MOD" VALUE amount="1.0000" datetime="2019-01-20T00:30:00+00:00"
RESOURCE name="PU" CHARGE name="MOD" VALUE amount="2.0000" datetime="2019-01-20T00:30:00+00:00"

我能够使用 DOM 解析 CHARGE_SUMMARY,见下文

 package Project;

 import java.io.File;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.DocumentBuilder;
 import org.w3c.dom.Document;
 import org.w3c.dom.NodeList;
 import org.w3c.dom.Node;
 import org.w3c.dom.Element;

 import java.util.*; // Required for Hashmap


 public class Parse_File_SS{

 public HashMap<String,ArrayList<String>> parsing_SS(String path , String report_type){

    String path_location = path;
 System.out.println("Parsing SS: " + "String: "+ path_location);

 String report = report_type;
 System.out.println("Report type: "+ report);

 HashMap<String,ArrayList<String>> hm=new HashMap<String,ArrayList<String>>();  

 try {
 File inputFile = new File(path_location);
 DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
 DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
 Document doc = dBuilder.parse(inputFile);
 doc.getDocumentElement().normalize();
 //[GETTING ROOT ELEMENT]
 System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
  NodeList nList1 = doc.getElementsByTagName("CHARGE_SUMMARY");
   //Iterating through CHARGE_SUMMARY List, within REPORT_SUMMARY******************************************************************
      for (int temp = 0; temp < nList1.getLength(); temp++) {
                        Node nNode = nList1.item(temp);
                        System.out.println("\nCurrent Element :" + nNode.getNodeName());

                        //Get Values Associated with Charge Summary

                        //1. amount
                        if (nNode.getNodeType() == Node.ELEMENT_NODE) {
                            Element eElement = (Element) nNode;
                            System.out.println("amount : " 
                               + eElement.getAttribute("amount"));
                        }
                        //2. date
                        if (nNode.getNodeType() == Node.ELEMENT_NODE) {
                            Element eElement = (Element) nNode;
                            System.out.println("date : " 
                               + eElement.getAttribute("date"));
                        }
                        //3. name
                        if (nNode.getNodeType() == Node.ELEMENT_NODE) {
                            Element eElement = (Element) nNode;
                            System.out.println("name : " 
                               + eElement.getAttribute("name"));
                        }
      } //     

但是下面是我遇到问题的地方,通过使用 NodeList nList3 = doc.getElementsByTagName("CHARGE");我正在遍历所有费用,而不是所需的具体费用

  //**********[GETTING TAGNAME: REPORT_DETAIL]******************************************************************           
   NodeList nList3 = doc.getElementsByTagName("CHARGE");
   System.out.println("-----------CHARGE-----------------");         


        //We need to iterate through the CHARGE within RESOURCE within REPORT_DETAIL******************************************************
        for (int temp3 = 0; temp3 < nList3.getLength(); temp3++) {  

                 Node nNode3 = nList3.item(temp3);
                 System.out.println("\nCurrent Element :" + nNode3.getNodeName());
            if (nNode3.getNodeType() == Node.ELEMENT_NODE) {
                    Element eElement3 = (Element) nNode3;
                    System.out.println("name : "  + eElement3.getAttribute("name"));


              //**** If  [1]  CHARGE_SUMMARY  name = CAB   ********************************************************************************                      
                          // now if name = CAB iterate through the values

                            if(eElement3.getAttribute("name").contains("CAB")) { 
                                System.out.println("CAB is Present");

                          //**********[GETTING TAGNAME: VALUE]**************************************************************    
                               NodeList nList_CAB = doc.getElementsByTagName("CAB");   //Get the list of values                                
                               System.out.println("The Length of the list is CAB: "+nList_CAB.getLength());

列表的长度为零,这是一个问题,

                           //Now iterate through them
                                         for (int temp_CAB = 0; temp_CAB < nList_CAB.getLength(); temp_CAB++) {
                                                Node nNode_CAB = nList_CAB.item(temp_CAB);
                                                System.out.println("\nCurrent Element of values:" + nNode_CAB.getNodeName()); 

                                          //1. date time
                                            if (nNode_CAB.getNodeType() == Node.ELEMENT_NODE) {
                                                    Element eElement_CAB = (Element) nNode_CAB;
                                                    System.out.println("datetime : "  + eElement_CAB.getAttribute("datetime"));
                                            }
                                          //2. amount
                                            if (nNode_CAB.getNodeType() == Node.ELEMENT_NODE) {
                                                    Element eElement_CAB = (Element) nNode_CAB;
                                                    System.out.println("amount : " + eElement_CAB.getAttribute("amount"));

                                             }
                                           }   
                                       }               
                }    
  }

    } catch (Exception e) {
       e.printStackTrace();

    }       
       return hm;
    }

【问题讨论】:

  • 您的预期输出到底是什么?您已经尝试过什么?
  • 请更具体地说明您期望获得的内容,并提供用于解析 XML 的代码片段,以便用户了解问题的上下文。
  • 我建议您查看jaxb
  • &lt;RESOURCE&gt; 标签未关闭。
  • 我已经更新了上面的细节,xml文件,目标输出和代码

标签: java xml dom xpath


【解决方案1】:

我会为此使用 XPath,因为我认为它使意图更加清晰:

public static void main(String... args)
        throws Exception
{
    // BEGIN: DOM Boilerplate
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder = factory.newDocumentBuilder();
    Document doc = builder.parse(new File("input.xml"));
    XPathFactory xPathfactory = XPathFactory.newInstance();
    XPath xpath = xPathfactory.newXPath();
    // END: DOM Boilerplate

    // Find and iterate over CHARGE_SUMMARY nodes
    XPathExpression chargeQuery = xpath.compile("/REPORT/REPORT_SUMMARY/CHARGE_SUMMARY");
    NodeList chargeSummaries = (NodeList) chargeQuery.evaluate(doc, XPathConstants.NODESET);
    for (int i = 0; i < chargeSummaries.getLength(); i++) {
        Element chargeSummary = (Element) chargeSummaries.item(i);

        System.out.printf("CHARGE_SUMMARY amount=\"%s\" date=\"%s\" name=\"%s\"%n",
                chargeSummary.getAttribute("amount"),
                chargeSummary.getAttribute("date"),
                chargeSummary.getAttribute("name"));
    }

    // Find and iterate over VALUE nodes
    XPathExpression valueQuery = xpath.compile("/REPORT/REPORT_DETAIL/RESOURCE/CHARGE/VALUE");
    NodeList values = (NodeList) valueQuery.evaluate(doc, XPathConstants.NODESET);
    String lastResourceName = null;
    for (int i = 0; i < values.getLength(); i++) {
        Element value = (Element) values.item(i);

        String resourceName = ((Element) value.getParentNode().getParentNode()).getAttribute("name");

        if (!resourceName.equals(lastResourceName)) {
            lastResourceName = resourceName;
            System.out.println();
        }

        System.out.printf("RESOURCE name=\"%s\" CHARGE name=\"%s\" VALUE amount=\"%s\" datetime=\"%s\"%n",
                resourceName,
                ((Element) value.getParentNode()).getAttribute("name"),
                value.getAttribute("amount"),
                value.getAttribute("datetime"));
    }
}

这是输出:

CHARGE_SUMMARY amount="0.0000" date="2019-01-20" name="CPREMIUM"
CHARGE_SUMMARY amount="-3.8895" date="2019-01-20" name="CUNIMB"

RESOURCE name="GU" CHARGE name="CAB" VALUE amount="3.0000" datetime="2019-01-20T00:30:00+00:00"
RESOURCE name="GU" CHARGE name="CAB" VALUE amount="0.0000" datetime="2019-01-20T01:30:00+00:00"
RESOURCE name="GU" CHARGE name="CPO" VALUE amount="0.0000" datetime="2019-01-20T00:30:00+00:00"
RESOURCE name="GU" CHARGE name="CPO" VALUE amount="0.0000" datetime="2019-01-20T01:30:00+00:00"

RESOURCE name="PU" CHARGE name="COD" VALUE amount="0.0000" datetime="2019-01-20T00:30:00+00:00"
RESOURCE name="PU" CHARGE name="COD" VALUE amount="0.0000" datetime="2019-01-20T01:30:00+00:00"
RESOURCE name="PU" CHARGE name="MOD" VALUE amount="1.0000" datetime="2019-01-20T00:30:00+00:00"
RESOURCE name="PU" CHARGE name="MOD" VALUE amount="2.0000" datetime="2019-01-20T01:30:00+00:00"

【讨论】:

  • 嗨,Seán,非常感谢您抽出宝贵时间!很棒的东西,希望这对以后的人也有帮助!
【解决方案2】:

Java/DOM 对于这项工作来说是一个非常低级的工具。 XSLT 有一点学习曲线,但它最终的代码要少得多。这是 XSLT 3.0 中的解决方案(最困难的部分是将空行放在正确的位置):

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0" expand-text="yes">
    <xsl:strip-space elements="*"/>
    <xsl:output method="text"/>
    <xsl:template match="REPORT_SUMMARY/CHARGE_SUMMARY">
        <xsl:text>CHARGE SUMMARY amount={@amount} date={@date} name={@name}&#xa;</xsl:text>      
    </xsl:template>
    <xsl:template match="RESOURCE">
        <xsl:text>&#xa;</xsl:text>
        <xsl:apply-templates/>
    </xsl:template>
    <xsl:template match="RESOURCE/CHARGE/VALUE">
        <xsl:text>RESOURCE name={../../@name} CHARGE name={../@name} VALUE amount={@amount} datetime={@datetime}&#xa;</xsl:text>
    </xsl:template>
</xsl:stylesheet>

您可以通过从 SourceForge 安装 Saxon-HE 并直接从命令行运行它来运行它:

java -jar saxon9he.jar -s:input.xml -xsl:stylesheet.xsl

或者您可以使用 API 从您的 Java 应用程序中运行它。

工作原理:处理器从根节点开始寻找匹配的模板规则。如果没有,它会下降一个级别,以此类推。这里有三个模板规则:一个在您点击 CHARGE_SUMMARY 时触发,一个在您点击 VALUE 时触发,这些只是以相当明显的方式输出内容。 match="RESOURCE" 模板仅用于输出相邻资源之间的空行;它输出一个换行符(XML 中的&amp;#xa;),然后调用 xsl:apply-templates,这意味着“现在使用适当的模板规则处理下一个级别”。

【讨论】:

  • 嗨迈克尔,感谢您的回复。我一直在努力使用 DOM,很高兴知道它不是这项工作的理想工具。 XSLT 很有趣,我将不得不做一些额外的阅读!
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2019-10-29
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
相关资源
最近更新 更多