""" 样品台账转换脚本 将 Excel 台账转换为标准 JSON 格式,便于导入系统 """ import pandas as pd import json import sys from datetime import datetime from pathlib import Path def convert_excel_to_json(excel_path, output_path=None): """ 将样品台账 Excel 转换为标准 JSON 格式 Args: excel_path: Excel 文件路径 output_path: 输出 JSON 文件路径(可选,默认与 Excel 同目录) """ # 读取 Excel,跳过第一行(标题行),第二行是列名 df = pd.read_excel(excel_path, header=1) # 删除第一行(列名解释行) df = df[df['收样情况'] != '序号'].reset_index(drop=True) # 转换日期格式 def parse_date(val): if pd.isna(val): return None if isinstance(val, datetime): return val.strftime('%Y-%m-%d') return str(val)[:10] if len(str(val)) >= 10 else str(val) # 构建标准格式数据 samples = [] for idx, row in df.iterrows(): # 跳过空行 if pd.isna(row.get('收样情况')): continue sample = { # 基本信息 'seq_no': int(row['收样情况']) if pd.notna(row['收样情况']) else None, 'receipt_date': parse_date(row.get('Unnamed: 1')), 'storage_flag': row.get('Unnamed: 2') if pd.notna(row.get('Unnamed: 2')) else None, 'commission_no': row.get('Unnamed: 3') if pd.notna(row.get('Unnamed: 3')) else None, # 样品信息 'sample_type': row.get('Unnamed: 4') if pd.notna(row.get('Unnamed: 4')) else None, 'sample_sn': str(row.get('Unnamed: 5')) if pd.notna(row.get('Unnamed: 5')) else None, 'hardware_version': row.get('Unnamed: 6') if pd.notna(row.get('Unnamed: 6')) else None, 'batch_no': row.get('Unnamed: 7') if pd.notna(row.get('Unnamed: 7')) else None, 'report_no': row.get('Unnamed: 8') if pd.notna(row.get('Unnamed: 8')) else None, # 测试信息 'external_status': row.get('Unnamed: 9') if pd.notna(row.get('Unnamed: 9')) else None, 'planned_test_items': row.get('Unnamed: 10') if pd.notna(row.get('Unnamed: 10')) else None, 'test_nature': row.get('Unnamed: 11') if pd.notna(row.get('Unnamed: 11')) else None, # 收样信息 'receipt_method': row.get('Unnamed: 12') if pd.notna(row.get('Unnamed: 12')) else None, 'delivery_person': row.get('Unnamed: 13') if pd.notna(row.get('Unnamed: 13')) else None, 'receiver': row.get('Unnamed: 14') if pd.notna(row.get('Unnamed: 14')) else None, # 交样信息 'handover_date': parse_date(row.get('交样情况')), 'handover_status': row.get('Unnamed: 16') if pd.notna(row.get('Unnamed: 16')) else None, 'actual_test_items': row.get('Unnamed: 17') if pd.notna(row.get('Unnamed: 17')) else None, 'handover_method': row.get('Unnamed: 18') if pd.notna(row.get('Unnamed: 18')) else None, 'registrar': row.get('Unnamed: 19') if pd.notna(row.get('Unnamed: 19')) else None, 'handover_person': row.get('Unnamed: 20') if pd.notna(row.get('Unnamed: 20')) else None, 'recipient': row.get('Unnamed: 21') if pd.notna(row.get('Unnamed: 21')) else None, # 其他 'transfer_info': row.get('流转信息登记') if pd.notna(row.get('流转信息登记')) else None, 'remark': row.get('备注') if pd.notna(row.get('备注')) else None, } # 只添加有数据的样品 if sample['sample_sn'] or sample['sample_type']: samples.append(sample) # 构建输出数据结构 output = { 'metadata': { 'source_file': Path(excel_path).name, 'export_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'total_count': len(samples), 'version': '1.0' }, 'samples': samples } # 确定输出路径 if output_path is None: output_path = Path(excel_path).with_suffix('.json') # 保存 JSON with open(output_path, 'w', encoding='utf-8') as f: json.dump(output, f, ensure_ascii=False, indent=2) print(f"转换完成!") print(f" 输入文件: {excel_path}") print(f" 输出文件: {output_path}") print(f" 样品数量: {len(samples)}") return output def convert_excel_to_csv(excel_path, output_path=None): """ 将样品台账 Excel 转换为标准 CSV 格式 """ # 读取 Excel df = pd.read_excel(excel_path, header=1) # 删除第一行(列名解释行) df = df[df['收样情况'] != '序号'].reset_index(drop=True) # 重命名列为标准名称 column_mapping = { '收样情况': 'seq_no', 'Unnamed: 1': 'receipt_date', 'Unnamed: 2': 'storage_flag', 'Unnamed: 3': 'commission_no', 'Unnamed: 4': 'sample_type', 'Unnamed: 5': 'sample_sn', 'Unnamed: 6': 'hardware_version', 'Unnamed: 7': 'batch_no', 'Unnamed: 8': 'report_no', 'Unnamed: 9': 'external_status', 'Unnamed: 10': 'planned_test_items', 'Unnamed: 11': 'test_nature', 'Unnamed: 12': 'receipt_method', 'Unnamed: 13': 'delivery_person', 'Unnamed: 14': 'receiver', '交样情况': 'handover_date', 'Unnamed: 16': 'handover_status', 'Unnamed: 17': 'actual_test_items', 'Unnamed: 18': 'handover_method', 'Unnamed: 19': 'registrar', 'Unnamed: 20': 'handover_person', 'Unnamed: 21': 'recipient', '流转信息登记': 'transfer_info', '备注': 'remark', } df = df.rename(columns=column_mapping) # 确定输出路径 if output_path is None: output_path = Path(excel_path).with_suffix('.csv') # 保存 CSV df.to_csv(output_path, index=False, encoding='utf-8-sig') print(f"CSV 转换完成!") print(f" 输出文件: {output_path}") print(f" 样品数量: {len(df)}") return df if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='样品台账转换工具') parser.add_argument('input', help='输入 Excel 文件路径') parser.add_argument('-o', '--output', help='输出文件路径(可选)') parser.add_argument('-f', '--format', choices=['json', 'csv', 'both'], default='both', help='输出格式(默认: both)') args = parser.parse_args() if args.format in ['json', 'both']: convert_excel_to_json(args.input, args.output if args.format == 'json' else None) if args.format in ['csv', 'both']: csv_output = args.output if args.format == 'both' and args.output: csv_output = Path(args.output).with_suffix('.csv') convert_excel_to_csv(args.input, csv_output)