其他其他转换xml至表格
小柯前言
最近在做一个项目,需要将xml文件中的数据转换为表格,方便后续的操作。
以下是我写的代码,希望对大家有所帮助。
代码
单个xml转换
import xml.etree.ElementTree as ET from openpyxl import Workbook
def parse_xml_to_excel(xml_file, excel_file): wb = Workbook() sheet = wb.active
tree = ET.parse(xml_file) root = tree.getroot()
ns = {'ns': 'http://www.sitemaps.org/schemas/sitemap/0.9'}
row_index = 1 for url in root.findall('ns:url', ns): loc = url.find('ns:loc', ns).text lastmod = url.find('ns:lastmod', ns).text if url.find('ns:lastmod', ns) is not None else ''
sheet.cell(row=row_index, column=1, value=loc) sheet.cell(row=row_index, column=2, value=lastmod)
row_index += 1
wb.save(excel_file) print(f"XML 文件已成功转换为 Excel 文件:{excel_file}")
input_xml_file = 'S:/Users/26370/Desktop/Down/mineleak.pro/mineleak.pro.xml' output_excel_file = 'output.xlsx'
parse_xml_to_excel(input_xml_file, output_excel_file)
|
文件夹下所有xml转换
import os import xml.etree.ElementTree as ET from openpyxl import Workbook
def parse_xml_to_excel(xml_folder, max_rows_per_sheet, output_excel_prefix): file_count = 1 row_index = 2
wb = Workbook() sheet = wb.active sheet.cell(row=1, column=1, value='URL') sheet.cell(row=1, column=2, value='Last Modified')
for filename in os.listdir(xml_folder): if filename.endswith('.xml'): xml_file = os.path.join(xml_folder, filename)
try: tree = ET.parse(xml_file) root = tree.getroot()
ns = {'ns': 'http://www.sitemaps.org/schemas/sitemap/0.9'}
for url in root.findall('ns:url', ns): loc = url.find('ns:loc', ns).text lastmod = url.find('ns:lastmod', ns).text if url.find('ns:lastmod', ns) is not None else ''
sheet.cell(row=row_index, column=1, value=loc) sheet.cell(row=row_index, column=2, value=lastmod)
row_index += 1
if row_index > max_rows_per_sheet: output_excel_path = f'{output_excel_prefix}_{file_count}.xlsx' wb.save(output_excel_path) print(f"已保存至 Excel 文件:{output_excel_path}")
file_count += 1 wb = Workbook() sheet = wb.active sheet.cell(row=1, column=1, value='URL') sheet.cell(row=1, column=2, value='Last Modified') row_index = 2
except ET.ParseError as e: print(f"解析 XML 文件 '{xml_file}' 出错: {e}")
output_excel_path = f'{output_excel_prefix}_{file_count}.xlsx' wb.save(output_excel_path) print(f"最后一部分数据已保存至 Excel 文件:{output_excel_path}")
xml_folder_path = 'S:/Users/26370/Desktop/Down/blackspigot.com' max_rows_per_sheet = 30000 output_excel_prefix = 'blackspigot'
parse_xml_to_excel(xml_folder_path, max_rows_per_sheet, output_excel_prefix)
|