1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
| import pandas as pd from bs4 import BeautifulSoup import os import tkinter as tk from tkinter import filedialog, messagebox
def process_html_file(html_file_path, excel_file_path): if not os.path.isfile(html_file_path): messagebox.showerror("错误", f"文件 {html_file_path} 不存在。") return
with open(html_file_path, "r", encoding="utf-8") as file: html_content = file.read()
soup = BeautifulSoup(html_content, "html.parser")
tables = soup.find_all("table")
if len(tables) < 2: messagebox.showerror("错误", "找不到足够的表格。") return
grades_table = tables[1]
rows = grades_table.find_all("tr") headers = [header.get_text(strip=True) for header in rows[0].find_all(["th", "td"])] data = [[cell.get_text(strip=True) for cell in row.find_all(["th", "td"])] for row in rows[1:]]
df = pd.DataFrame(data, columns=headers)
try: df.to_excel(excel_file_path, index=False) messagebox.showinfo("成功", f"Excel 文件已保存为 '{excel_file_path}'") except Exception as e: messagebox.showerror("错误", f"保存 Excel 文件时出错: {e}")
def select_html_file(): file_path = filedialog.askopenfilename( title="选择 HTML 文件", filetypes=[("HTML Files", "*.html")] ) if file_path: html_file_entry.delete(0, tk.END) html_file_entry.insert(0, file_path)
def select_save_location(): file_path = filedialog.asksaveasfilename( title="保存为", defaultextension=".xlsx", filetypes=[("Excel Files", "*.xlsx")] ) if file_path: save_file_entry.delete(0, tk.END) save_file_entry.insert(0, file_path)
def run_processing(): html_file_path = html_file_entry.get() excel_file_path = save_file_entry.get() if not html_file_path or not excel_file_path: messagebox.showwarning("警告", "请确保选择了 HTML 文件和保存路径。") return process_html_file(html_file_path, excel_file_path)
root = tk.Tk() root.title("成绩数据提取器")
tk.Label(root, text="选择 HTML 文件:").grid(row=0, column=0, padx=10, pady=10, sticky="e") html_file_entry = tk.Entry(root, width=50) html_file_entry.grid(row=0, column=1, padx=10, pady=10) tk.Button(root, text="浏览", command=select_html_file).grid(row=0, column=2, padx=10, pady=10)
tk.Label(root, text="保存为 Excel 文件:").grid(row=1, column=0, padx=10, pady=10, sticky="e") save_file_entry = tk.Entry(root, width=50) save_file_entry.grid(row=1, column=1, padx=10, pady=10) tk.Button(root, text="浏览", command=select_save_location).grid(row=1, column=2, padx=10, pady=10)
tk.Button(root, text="处理", command=run_processing).grid(row=2, column=0, columnspan=3, padx=10, pady=20)
root.mainloop()
|