ETest-Vue-FastAPI/scripts/convert_sample_ledger.py

184 lines
7.0 KiB
Python
Raw Normal View History

2026-04-15 19:06:01 +08:00
"""
样品台账转换脚本
Excel 台账转换为标准 JSON 格式便于导入系统
"""
import pandas as pd
import json
import sys
from datetime import datetime
from pathlib import Path
def convert_excel_to_json(excel_path, output_path=None):
"""
将样品台账 Excel 转换为标准 JSON 格式
Args:
excel_path: Excel 文件路径
output_path: 输出 JSON 文件路径可选默认与 Excel 同目录
"""
# 读取 Excel跳过第一行标题行第二行是列名
df = pd.read_excel(excel_path, header=1)
# 删除第一行(列名解释行)
df = df[df['收样情况'] != '序号'].reset_index(drop=True)
# 转换日期格式
def parse_date(val):
if pd.isna(val):
return None
if isinstance(val, datetime):
return val.strftime('%Y-%m-%d')
return str(val)[:10] if len(str(val)) >= 10 else str(val)
# 构建标准格式数据
samples = []
for idx, row in df.iterrows():
# 跳过空行
if pd.isna(row.get('收样情况')):
continue
sample = {
# 基本信息
'seq_no': int(row['收样情况']) if pd.notna(row['收样情况']) else None,
'receipt_date': parse_date(row.get('Unnamed: 1')),
'storage_flag': row.get('Unnamed: 2') if pd.notna(row.get('Unnamed: 2')) else None,
'commission_no': row.get('Unnamed: 3') if pd.notna(row.get('Unnamed: 3')) else None,
# 样品信息
'sample_type': row.get('Unnamed: 4') if pd.notna(row.get('Unnamed: 4')) else None,
'sample_sn': str(row.get('Unnamed: 5')) if pd.notna(row.get('Unnamed: 5')) else None,
'hardware_version': row.get('Unnamed: 6') if pd.notna(row.get('Unnamed: 6')) else None,
'batch_no': row.get('Unnamed: 7') if pd.notna(row.get('Unnamed: 7')) else None,
'report_no': row.get('Unnamed: 8') if pd.notna(row.get('Unnamed: 8')) else None,
# 测试信息
'external_status': row.get('Unnamed: 9') if pd.notna(row.get('Unnamed: 9')) else None,
'planned_test_items': row.get('Unnamed: 10') if pd.notna(row.get('Unnamed: 10')) else None,
'test_nature': row.get('Unnamed: 11') if pd.notna(row.get('Unnamed: 11')) else None,
# 收样信息
'receipt_method': row.get('Unnamed: 12') if pd.notna(row.get('Unnamed: 12')) else None,
'delivery_person': row.get('Unnamed: 13') if pd.notna(row.get('Unnamed: 13')) else None,
'receiver': row.get('Unnamed: 14') if pd.notna(row.get('Unnamed: 14')) else None,
# 交样信息
'handover_date': parse_date(row.get('交样情况')),
'handover_status': row.get('Unnamed: 16') if pd.notna(row.get('Unnamed: 16')) else None,
'actual_test_items': row.get('Unnamed: 17') if pd.notna(row.get('Unnamed: 17')) else None,
'handover_method': row.get('Unnamed: 18') if pd.notna(row.get('Unnamed: 18')) else None,
'registrar': row.get('Unnamed: 19') if pd.notna(row.get('Unnamed: 19')) else None,
'handover_person': row.get('Unnamed: 20') if pd.notna(row.get('Unnamed: 20')) else None,
'recipient': row.get('Unnamed: 21') if pd.notna(row.get('Unnamed: 21')) else None,
# 其他
'transfer_info': row.get('流转信息登记') if pd.notna(row.get('流转信息登记')) else None,
'remark': row.get('备注') if pd.notna(row.get('备注')) else None,
}
# 只添加有数据的样品
if sample['sample_sn'] or sample['sample_type']:
samples.append(sample)
# 构建输出数据结构
output = {
'metadata': {
'source_file': Path(excel_path).name,
'export_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'total_count': len(samples),
'version': '1.0'
},
'samples': samples
}
# 确定输出路径
if output_path is None:
output_path = Path(excel_path).with_suffix('.json')
# 保存 JSON
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output, f, ensure_ascii=False, indent=2)
print(f"转换完成!")
print(f" 输入文件: {excel_path}")
print(f" 输出文件: {output_path}")
print(f" 样品数量: {len(samples)}")
return output
def convert_excel_to_csv(excel_path, output_path=None):
"""
将样品台账 Excel 转换为标准 CSV 格式
"""
# 读取 Excel
df = pd.read_excel(excel_path, header=1)
# 删除第一行(列名解释行)
df = df[df['收样情况'] != '序号'].reset_index(drop=True)
# 重命名列为标准名称
column_mapping = {
'收样情况': 'seq_no',
'Unnamed: 1': 'receipt_date',
'Unnamed: 2': 'storage_flag',
'Unnamed: 3': 'commission_no',
'Unnamed: 4': 'sample_type',
'Unnamed: 5': 'sample_sn',
'Unnamed: 6': 'hardware_version',
'Unnamed: 7': 'batch_no',
'Unnamed: 8': 'report_no',
'Unnamed: 9': 'external_status',
'Unnamed: 10': 'planned_test_items',
'Unnamed: 11': 'test_nature',
'Unnamed: 12': 'receipt_method',
'Unnamed: 13': 'delivery_person',
'Unnamed: 14': 'receiver',
'交样情况': 'handover_date',
'Unnamed: 16': 'handover_status',
'Unnamed: 17': 'actual_test_items',
'Unnamed: 18': 'handover_method',
'Unnamed: 19': 'registrar',
'Unnamed: 20': 'handover_person',
'Unnamed: 21': 'recipient',
'流转信息登记': 'transfer_info',
'备注': 'remark',
}
df = df.rename(columns=column_mapping)
# 确定输出路径
if output_path is None:
output_path = Path(excel_path).with_suffix('.csv')
# 保存 CSV
df.to_csv(output_path, index=False, encoding='utf-8-sig')
print(f"CSV 转换完成!")
print(f" 输出文件: {output_path}")
print(f" 样品数量: {len(df)}")
return df
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='样品台账转换工具')
parser.add_argument('input', help='输入 Excel 文件路径')
parser.add_argument('-o', '--output', help='输出文件路径(可选)')
parser.add_argument('-f', '--format', choices=['json', 'csv', 'both'],
default='both', help='输出格式(默认: both')
args = parser.parse_args()
if args.format in ['json', 'both']:
convert_excel_to_json(args.input,
args.output if args.format == 'json' else None)
if args.format in ['csv', 'both']:
csv_output = args.output
if args.format == 'both' and args.output:
csv_output = Path(args.output).with_suffix('.csv')
convert_excel_to_csv(args.input, csv_output)