【问题标题】:how to split an XML file into multiple XML files using java如何使用java将XML文件拆分为多个XML文件
【发布时间】:2015-03-20 12:08:52
【问题描述】:

我第一次在 Java 中使用 XML 文件,我需要一些帮助。我正在尝试使用 Java 将 XML 文件拆分为多个 XML 文件

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<products>
    <product>
        <description>Sony 54.6" (Diag) Xbr Hx929 Internet Tv</description>
        <gtin>00027242816657</gtin>
        <price>2999.99</price>
        <orderId>2343</orderId>
        <supplier>Sony</supplier>
    </product>
    <product>
        <description>Apple iPad 2 with Wi-Fi 16GB - iOS 5 - Black
        </description>
        <gtin>00885909464517</gtin>
        <price>399.0</price>
        <orderId>2343</orderId>
        <supplier>Apple</supplier>
    </product>
    <product>
        <description>Sony NWZ-E464 8GB E Series Walkman Video MP3 Player Blue
        </description>
        <gtin>00027242831438</gtin>
        <price>91.99</price>
        <orderId>2343</orderId>
        <supplier>Sony</supplier>
    </product>
    <product>
        <description>Apple MacBook Air A 11.6" Mac OS X v10.7 Lion MacBook
        </description>
        <gtin>00885909464043</gtin>
        <price>1149.0</price>
        <orderId>2344</orderId>
        <supplier>Apple</supplier>
    </product>
    <product>
        <description>Panasonic TC-L47E50 47" Smart TV Viera E50 Series LED
            HDTV</description>
        <gtin>00885170076471</gtin>
        <price>999.99</price>
        <orderId>2344</orderId>
        <supplier>Panasonic</supplier>
    </product>
</products>

我正在尝试获取三个 XML 文档,例如:

 <?xml version="1.0" encoding="UTF-8"?>
<products>
        <product>
            <description>Sony 54.6" (Diag) Xbr Hx929 Internet Tv</description>
            <gtin>00027242816657</gtin>
            <price currency="USD">2999.99</price>
            <orderid>2343</orderid>
        </product>
        <product>
            <description>Sony NWZ-E464 8GB E Series Walkman Video MP3 Player Blue</description>
            <gtin>00027242831438</gtin>
            <price currency="USD">91.99</price>
            <orderid>2343</orderid>
        </product>
</products>

每个供应商一个。我怎样才能收到它?对此的任何帮助都会很棒。

【问题讨论】:

  • 到目前为止您已经尝试过什么? Java 有很多机会用于 XML 处理。机会包括编组/解组 DOM 模型、Stream XML 读/写、运行 XSLT 转换等。
  • 我从不使用 XSLT。我能怎么做?我需要在 2 小时内解决这个问题。你能帮我吗?请? :(

标签: java xml


【解决方案1】:

确保将“inputFile”中的路径更改为文件以及输出部分:

StreamResult result = new StreamResult(new File("C:\xmls\" + supplier.trim() + ".xml"));

这里是你的代码。

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class ExtractXml
{
    /**
     * @param args
     */
    public static void main(String[] args) throws Exception
    {
        String inputFile = "resources/products.xml";

        File xmlFile = new File(inputFile);
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(xmlFile);

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true); // never forget this!

        XPathFactory xfactory = XPathFactory.newInstance();
        XPath xpath = xfactory.newXPath();
        XPathExpression allProductsExpression = xpath.compile("//product/supplier/text()");
        NodeList productNodes = (NodeList) allProductsExpression.evaluate(doc, XPathConstants.NODESET);

        //Save all the products
        List<String> suppliers = new ArrayList<String>();
        for (int i=0; i<productNodes.getLength(); ++i)
        {
            Node productName = productNodes.item(i);

            System.out.println(productName.getTextContent());
            suppliers.add(productName.getTextContent());
        }

        //Now we create the split XMLs

        for (String supplier : suppliers)
        {
            String xpathQuery = "/products/product[supplier='" + supplier + "']";

            xpath = xfactory.newXPath();
            XPathExpression query = xpath.compile(xpathQuery);
            NodeList productNodesFiltered = (NodeList) query.evaluate(doc, XPathConstants.NODESET);

            System.out.println("Found " + productNodesFiltered.getLength() + 
                               " product(s) for supplier " + supplier);

            //We store the new XML file in supplierName.xml e.g. Sony.xml
            Document suppXml = dBuilder.newDocument();

            //we have to recreate the root node <products>
            Element root = suppXml.createElement("products"); 
            suppXml.appendChild(root);
            for (int i=0; i<productNodesFiltered.getLength(); ++i)
            {
                Node productNode = productNodesFiltered.item(i);

                //we append a product (cloned) to the new file
                Node clonedNode = productNode.cloneNode(true);
                suppXml.adoptNode(clonedNode); //We adopt the orphan :)
                root.appendChild(clonedNode);
            }

            //At the end, we save the file XML on disk
            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            DOMSource source = new DOMSource(suppXml);

            StreamResult result =  new StreamResult(new File("resources/" + supplier.trim() + ".xml"));
            transformer.transform(source, result);

            System.out.println("Done for " + supplier);
        }
    }

}

【讨论】:

  • 非常感谢!这很棒!完美运行!
  • 如何显示 Apple23.xml,23 是订单 ID 的第一个路径
  • 我把这个练习留给你 :)
【解决方案2】:

DOM 解析器会消耗更多内存。我更喜欢使用 SAX 解析器来读取和写入。

【讨论】:

    【解决方案3】:

    我喜欢 Xmappr (https://code.google.com/p/xmappr/) 的方法,您可以在其中使用简单的注释:

    首先是根元素 Products,它只包含一个 Product-instances 列表

    @RootElement
    public class Products {
    
        @Element
        public List<Product> product;
    }
    

    然后是产品类

    @RootElement
    public class Product {
    
       @Element
       public String description;
    
       @Element
       public String supplier;
    
       @Element
       public String gtin;
    
       @Element
       public String price;
    
       @Element
       public String orderId;
    }
    

    然后您只需从产品中获取产品实例:

    public static void main(String[] args) throws FileNotFoundException {
        Reader reader = new FileReader("test.xml");
        Xmappr xm = new Xmappr(Products.class);
        Products products = (Products) xm.fromXML(reader);
    
        // fetch list of products
        List<Product> listOfProducts = products.product;
    
        // do sth with the products in the list
        for (Product product : listOfProducts) {
            System.out.println(product.description);
        }       
    }
    

    然后你可以对产品做任何你想做的事情(例如,根据供应商对它们进行分类并将它们放到一个 xml 文件中)

    【讨论】:

    • org.xmappr.ConfigurationProcessor.readRootElementAnnotations(ConfigurationProcessor.java:106) 处 org.xmappr.Xmappr.initialize(Xmappr.java:300) 处的线程“主”java.lang.NullPointerException 中的异常org.xmappr.Xmappr.fromXML(Xmappr.java:127) 在 XMLReader.Test(XMLReader.java:269) 在 XMLReader.main(XMLReader.java:127)
    • 这是完整的堆栈跟踪吗?您是否对代码进行了任何修改?
    【解决方案4】:

    您可以在这里查看如何在 Java 中使用 DOM 解析 XML 文档: DOM XML Parser Example

    这里,如何编写新的 XML 文件: Create XML file using java

    此外,您可以学习 XPath 以轻松选择节点:Java Xpath expression

    如果性能不是您的目标,首先,加载 DOM 和 Xpath 后,您可以使用以下 XPath 查询检索 xml 文档中的所有供应商 p>

    //supplier/text()
    

    你会得到类似的东西:

    Text='Sony'
    Text='Apple'
    Text='Sony'
    Text='Apple'
    Text='Panasonic'
    

    然后我会将这些结果放入 ArraryList 或其他任何内容中。第二步将是该集合的迭代,并为每个项目查询 XML 输入文档,以提取具有特定供应商的所有节点:

    /products/product[supplier='Sony'] 
    

    当然,在 java 中,您必须以动态方式构建最后一个 xpath 查询:

    String xpathQuery = "/products/product/[supplier='" + currentValue + "']
    

    之后,您将获得与您指定的供应商匹配的节点列表。下一步是构建新的 XML DOM 并将其保存在文件中。

    【讨论】:

    • 我想按供应商分类产品。我不知道该怎么做。你能给我一些代码吗?或类似的东西......
    • 我想按供应商分类产品。我不知道该怎么做。你能给我一些代码吗?或类似的东西......
    • 如何填充该 ArrayList?现在我有这个代码: String expSup = "//supplier/text()";字符串路径 = "myFile.xml"; ArrayList 供应商 = new ArrayList();文档 xmlDocument = DocumentBuilderFactory.newInstance() .newDocumentBuilder().parse(path); XPath xPathSup = XPathFactory.newInstance().newXPath(); XPathExpression xPathExpression = xPathSup.compile(expSup)
    • 我该如何做一个数组列表...最后一个问题...请...是紧急情况...:-s
    【解决方案5】:

    考虑这个 xml

    <?xml version="1.0"?>
    <SSNExportDocument xmlns="urn:com:ssn:schema:export:SSNExportFormat.xsd" Version="0.1" DocumentID="b482350d-62bb-41be-b792-8a9fe3884601-1" ExportID="b482350d-62bb-41be-b792-8a9fe3884601" JobID="464" RunID="3532468" CreationTime="2019-04-16T02:20:01.332-04:00" StartTime="2019-04-15T20:20:00.000-04:00" EndTime="2019-04-16T02:20:00.000-04:00">
        <MeterData MeterName="MUNI1-11459398" UtilDeviceID="11459398" MacID="00:12:01:fae:fe:00:d5:fc">
            <RegisterData StartTime="2019-04-15T20:00:00.000-04:00" EndTime="2019-04-15T20:00:00.000-04:00" NumberReads="1">
                <RegisterRead ReadTime="2019-04-15T20:00:00.000-04:00" GatewayCollectedTime="2019-04-16T01:40:06.214-04:00" RegisterReadSource="REG_SRC_TYPE_EO_CURR_READ" Season="-1">
                    <Tier Number="0">
                        <Register Number="1" Summation="5949.1000" SummationUOM="GAL"/>
                    </Tier>
                </RegisterRead>
            </RegisterData>
        </MeterData>
        <MeterData MeterName="MUNI4-11460365" UtilDeviceID="11460365" MacID="00:11:01:bc:fe:00:d3:f9">
            <RegisterData StartTime="2019-04-15T20:00:00.000-04:00" EndTime="2019-04-15T20:00:00.000-04:00" NumberReads="1">
                <RegisterRead ReadTime="2019-04-15T20:00:00.000-04:00" GatewayCollectedTime="2019-04-16T01:40:11.082-04:00" RegisterReadSource="REG_SRC_TYPE_EO_CURR_READ" Season="-1">
                    <Tier Number="0">
                        <Register Number="1" Summation="136349.9000" SummationUOM="GAL"/>
                    </Tier>
                </RegisterRead>
            </RegisterData>
        </MeterData>
    

    我们可以使用 JAXB 将您的 xml 标签转换为对象。然后我们可以和他们一起玩。

    File xmlFile = new File("input.xml");
    jaxbContext = JAXBContext.newInstance(SSNExportDocument.class);
    Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
    SSNExportDocument ssnExpDoc = (SSNExportDocument) jaxbUnmarshaller.unmarshal(xmlFile);
    MeterData mD = new MeterData();
    Map<String, List<MeterData>> meterMapper = new HashMap<String, List<MeterData>>(); // Phantom Reference
    
    for (MeterData mData : ssnExpDoc.getMeterData()) {
                String meterFullName = mData.getMeterName();
                String[] splitMeterName = meterFullName.split("-");
                List<MeterData> _meterDataList = meterMapper.get(splitMeterName[0]);// o(1)
                if (_meterDataList == null) {
                    _meterDataList = new ArrayList<>();
                    _meterDataList.add(mData);
                    meterMapper.put(splitMeterName[0], _meterDataList);
                    _meterDataList = null;
                } else {
                    _meterDataList.add(mData);
                }
            }
    

    meterMapper 包含对象列表的标签名称

    然后使用来编组内容

           JAXBContext jaxbContext = JAXBContext.newInstance(SSNExportDocument.class);
    
            // Create Marshaller
            Marshaller jaxbMarshaller = jaxbContext.createMarshaller();
    
            // Required formatting??
            jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
            jaxbMarshaller.setProperty(Marshaller.JAXB_FRAGMENT, Boolean.TRUE);
            //jaxbMarshaller.setProperty("com.sun.xml.bind.xmlDeclaration", Boolean.FALSE);
    
            // Print XML String to Console
    
            StringWriter sw = new StringWriter();
    
            // Write XML to StringWriter
            jaxbMarshaller.marshal(employee, sw);
    
            // Verify XML Content
            String xmlContent = sw.toString();
            System.out.println(xmlContent);
    

    【讨论】:

      【解决方案6】:

      不是一个完美的解决方案,但在大多数情况下都有效。必须玩一些字符串操作才能使其工作。基本上,这个解决方案将给定的 XML 拆分为给定的元素并形成子 XML 并将它们写入一个列表。

      public static void main(String[] args) {
          java.io.File inputFile = new java.io.File("input.xml");
          String elementSplitString = "product";
          java.io.InputStream inputStream = null;
      
          try {
              
      
              inputStream = new java.io.BufferedInputStream(new java.io.FileInputStream(inputFile));
      
              javax.xml.stream.XMLInputFactory inputFactory = javax.xml.stream.XMLInputFactory.newInstance();
              javax.xml.stream.XMLOutputFactory outputFactory = javax.xml.stream.XMLOutputFactory.newInstance();
              javax.xml.stream.XMLEventReader reader = inputFactory.createXMLEventReader(inputStream);
              javax.xml.stream.XMLEventWriter writer = null;
              StringWriter parentXMLStringWriter = new StringWriter();
              javax.xml.stream.XMLEventWriter headerWriter = outputFactory.createXMLEventWriter(parentXMLStringWriter); 
              StringWriter stringWriter = null;
              String lastReadEvent = "";
              boolean splitElementFound = false;
              List<StringBuilder> list = new ArrayList<StringBuilder>();
              while (reader.hasNext()) {
                  javax.xml.stream.events.XMLEvent event = reader.nextEvent();
                  
                  
                  switch(event.getEventType()) {
                      case javax.xml.stream.XMLStreamConstants.START_ELEMENT:
                          javax.xml.stream.events.StartElement startElement = (javax.xml.stream.events.StartElement)event;
                          if (startElement.getName().getLocalPart().equals(elementSplitString)) {
                              splitElementFound = true;
                              stringWriter = new StringWriter();
                              writer = outputFactory.createXMLEventWriter(stringWriter);
                              if (writer != null) writer.add(event);
                          } else if(writer != null)
                               writer.add(event);
                          
                          break;
      
                      case javax.xml.stream.XMLStreamConstants.END_ELEMENT:
                          javax.xml.stream.events.EndElement endElement = (javax.xml.stream.events.EndElement)event;
                          if (endElement.getName().getLocalPart().equals(elementSplitString)) {
                              if (writer != null) writer.add(event);
                              
                              writer.close();
                              StringBuilder builder = new StringBuilder();
                              String parentXML = parentXMLStringWriter.toString();
                              builder.append(parentXML.subSequence(0, parentXML.indexOf(">", parentXML.indexOf(lastReadEvent)) + 1));
                              builder.append(stringWriter.toString());
                              builder.append(parentXML.substring(parentXML.indexOf(">", parentXML.indexOf(lastReadEvent)) + 2));
                              list.add(builder);
                              writer = null;
                          }else if(writer != null)
                              writer.add(event);
                          break;
      
                      default:
                          if (writer != null) 
                              writer.add(event);
                          break;
                  }
                  if(!splitElementFound) {
                      if(event instanceof javax.xml.stream.events.StartElement)
                          lastReadEvent = ((javax.xml.stream.events.StartElement)event).getName().getLocalPart();
                      else if(event instanceof javax.xml.stream.events.EndElement)
                          lastReadEvent = ((javax.xml.stream.events.EndElement)event).getName().getLocalPart();
                      headerWriter.add(event);
                  }else {
                      headerWriter.close();
                  }
      
              }
              
              headerWriter = null;
              reader.close();
              if (writer != null) writer.close();
          } catch(Throwable ex) {
              ex.printStackTrace();
          } finally {
              if (inputStream != null) {
                  try {
                      inputStream.close();
                  } catch (java.io.IOException ex) {
                      // do nothing
                  }
              }
          }
      } 
      

      【讨论】:

        【解决方案7】:

        如果您有 XML 方言 JAXB 的架构 (XSD),则可以替代 Dom。

        【讨论】:

          猜你喜欢
          • 1970-01-01
          • 1970-01-01
          • 2011-09-20
          • 1970-01-01
          • 2019-08-06
          • 2017-03-14
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          相关资源
          最近更新 更多