【发布时间】:2023-10-18 18:42:01
【问题描述】:
* 用户帮助我将 XML 文件解析为 csv 文件,这对我帮助很大。这很有帮助,我决定尝试遍历整个目录并将所有 xml 文件解析为单个 csv 文件。这是有效的代码:
import xml.etree.ElementTree as ET
import csv
extra_columns = 2
fields = [
('Id_Customer', 'Id_Customer', 1),
('Segment', 'Segment', 1),
('Nature', 'Event/Nature', 1),
('Extrainfo', 'Event/Extrainfo', 1),
('zipcode', 'Adress/zipcode', extra_columns),
('street', 'Adress/street', extra_columns),
('number', 'Adress/number', extra_columns)]
tree = ET.parse('cat.xml')
root = tree.getroot()
# Auto create the header from fields
fieldnames = []
for field, match, cols in fields:
fieldnames.append(field)
if cols > 1:
fieldnames.extend(["{}{}".format(field, x+2) for x in range(extra_columns)])
with open(r'customerdata.csv', 'wb') as f_customerdata:
csv_customerdata = csv.DictWriter(f_customerdata, fieldnames=fieldnames)
csv_customerdata.writeheader()
for node in tree.iter('Customer'):
row = {}
for field_name, match, cols in fields:
if cols > 1:
for index, el in enumerate(node.findall(match)):
try:
if index:
row["{}{}".format(field_name, index+1)] = el.text
else:
row[field_name] = el.text
except AttributeError as e:
row[field_name] = ''
else:
try:
row[field_name] = node.find(match).text
except AttributeError as e:
row[field_name] = ''
csv_customerdata.writerow(row)
然后我尝试引入listdir来查找所有文件名(这一步有效):
for filename in os.path.join(r'C:\docs', filename):
if not filename.endswith('.xml'): continue
fullname = os.path.join(r'C:\docs', filename)
但是,当我尝试集成这些步骤时,我发现我只能从目录中的第一个 xml 文件中获取数据。请参阅下面的合并代码。我试图弄清楚为什么我的 for 循环没有迭代并将每个解析的 xml 文件写入 csv 文件。
import xml.etree.ElementTree as ET
import csv
import os
extra_columns = 2
fields = [
('Id_Customer', 'Id_Customer', 1),
('Segment', 'Segment', 1),
('Nature', 'Event/Nature', 1),
('Extrainfo', 'Event/Extrainfo', 1),
('zipcode', 'Adress/zipcode', extra_columns),
('street', 'Adress/street', extra_columns),
('number', 'Adress/number', extra_columns)]
#tree = ET.parse('cat.xml')
#root = tree.getroot()
# Auto create the header from fields
fieldnames = []
with open(r'customerdata.csv', 'wb') as f_customerdata:
csv_customerdata = csv.DictWriter(f_customerdata, fieldnames=fieldnames)
csv_customerdata.writeheader()
for filename in os.listdir(r'C:\docs'):
if not filename.endswith('.xml'): continue
fullname = os.path.join(r'C:\docs',filename)
tree = ET.parse(fullname)
root = tree.getroot()
for node in tree.iter('Customer'):
row = {}
for field_name, match, cols in fields:
if cols > 1:
for index, el in enumerate(node.findall(match)):
try:
if index:
row["{}{}".format(field_name, index+1)] = el.text
else:
row[field_name] = el.text
except AttributeError as e:
row[field_name] = ''
else:
try:
row[field_name] = node.find(match).text
except AttributeError as e:
row[field_name] = ''
csv_customerdata.writerow(row)
更新:
with open(r'customerdata.csv', 'wb') as f_customerdata:
csv_customerdata = csv.DictWriter(f_customerdata, fieldnames=fieldnames)
csv_customerdata.writeheader()
for filename in os.listdir(r'C:\docs'):
if not filename.endswith('.xml'): continue
fullname = os.path.join(r'C:\docs',filename)
tree = ET.parse(fullname)
root = tree.getroot()
for node in tree.iter('Customer'):
row = {}
for field_name, match, cols in fields:
if cols > 1:
for index, el in enumerate(node.findall(match)):
try:
if index:
row["{}{}".format(field_name, index+1)] = el.text
else:
row[field_name] = el.text
except AttributeError as e:
row[field_name] = ''
else:
try:
row[field_name] = node.find(match).text
except AttributeError as e:
row[field_name] = ''
csv_customerdata.writerow(row)
【问题讨论】: