ETest-Vue-FastAPI/scripts/convert_sample_ledger.py

184 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""
样品台账转换脚本
将 Excel 台账转换为标准 JSON 格式,便于导入系统
"""
import pandas as pd
import json
import sys
from datetime import datetime
from pathlib import Path
def convert_excel_to_json(excel_path, output_path=None):
"""
将样品台账 Excel 转换为标准 JSON 格式
Args:
excel_path: Excel 文件路径
output_path: 输出 JSON 文件路径(可选,默认与 Excel 同目录)
"""
# 读取 Excel跳过第一行标题行第二行是列名
df = pd.read_excel(excel_path, header=1)
# 删除第一行(列名解释行)
df = df[df['收样情况'] != '序号'].reset_index(drop=True)
# 转换日期格式
def parse_date(val):
if pd.isna(val):
return None
if isinstance(val, datetime):
return val.strftime('%Y-%m-%d')
return str(val)[:10] if len(str(val)) >= 10 else str(val)
# 构建标准格式数据
samples = []
for idx, row in df.iterrows():
# 跳过空行
if pd.isna(row.get('收样情况')):
continue
sample = {
# 基本信息
'seq_no': int(row['收样情况']) if pd.notna(row['收样情况']) else None,
'receipt_date': parse_date(row.get('Unnamed: 1')),
'storage_flag': row.get('Unnamed: 2') if pd.notna(row.get('Unnamed: 2')) else None,
'commission_no': row.get('Unnamed: 3') if pd.notna(row.get('Unnamed: 3')) else None,
# 样品信息
'sample_type': row.get('Unnamed: 4') if pd.notna(row.get('Unnamed: 4')) else None,
'sample_sn': str(row.get('Unnamed: 5')) if pd.notna(row.get('Unnamed: 5')) else None,
'hardware_version': row.get('Unnamed: 6') if pd.notna(row.get('Unnamed: 6')) else None,
'batch_no': row.get('Unnamed: 7') if pd.notna(row.get('Unnamed: 7')) else None,
'report_no': row.get('Unnamed: 8') if pd.notna(row.get('Unnamed: 8')) else None,
# 测试信息
'external_status': row.get('Unnamed: 9') if pd.notna(row.get('Unnamed: 9')) else None,
'planned_test_items': row.get('Unnamed: 10') if pd.notna(row.get('Unnamed: 10')) else None,
'test_nature': row.get('Unnamed: 11') if pd.notna(row.get('Unnamed: 11')) else None,
# 收样信息
'receipt_method': row.get('Unnamed: 12') if pd.notna(row.get('Unnamed: 12')) else None,
'delivery_person': row.get('Unnamed: 13') if pd.notna(row.get('Unnamed: 13')) else None,
'receiver': row.get('Unnamed: 14') if pd.notna(row.get('Unnamed: 14')) else None,
# 交样信息
'handover_date': parse_date(row.get('交样情况')),
'handover_status': row.get('Unnamed: 16') if pd.notna(row.get('Unnamed: 16')) else None,
'actual_test_items': row.get('Unnamed: 17') if pd.notna(row.get('Unnamed: 17')) else None,
'handover_method': row.get('Unnamed: 18') if pd.notna(row.get('Unnamed: 18')) else None,
'registrar': row.get('Unnamed: 19') if pd.notna(row.get('Unnamed: 19')) else None,
'handover_person': row.get('Unnamed: 20') if pd.notna(row.get('Unnamed: 20')) else None,
'recipient': row.get('Unnamed: 21') if pd.notna(row.get('Unnamed: 21')) else None,
# 其他
'transfer_info': row.get('流转信息登记') if pd.notna(row.get('流转信息登记')) else None,
'remark': row.get('备注') if pd.notna(row.get('备注')) else None,
}
# 只添加有数据的样品
if sample['sample_sn'] or sample['sample_type']:
samples.append(sample)
# 构建输出数据结构
output = {
'metadata': {
'source_file': Path(excel_path).name,
'export_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'total_count': len(samples),
'version': '1.0'
},
'samples': samples
}
# 确定输出路径
if output_path is None:
output_path = Path(excel_path).with_suffix('.json')
# 保存 JSON
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output, f, ensure_ascii=False, indent=2)
print(f"转换完成!")
print(f" 输入文件: {excel_path}")
print(f" 输出文件: {output_path}")
print(f" 样品数量: {len(samples)}")
return output
def convert_excel_to_csv(excel_path, output_path=None):
"""
将样品台账 Excel 转换为标准 CSV 格式
"""
# 读取 Excel
df = pd.read_excel(excel_path, header=1)
# 删除第一行(列名解释行)
df = df[df['收样情况'] != '序号'].reset_index(drop=True)
# 重命名列为标准名称
column_mapping = {
'收样情况': 'seq_no',
'Unnamed: 1': 'receipt_date',
'Unnamed: 2': 'storage_flag',
'Unnamed: 3': 'commission_no',
'Unnamed: 4': 'sample_type',
'Unnamed: 5': 'sample_sn',
'Unnamed: 6': 'hardware_version',
'Unnamed: 7': 'batch_no',
'Unnamed: 8': 'report_no',
'Unnamed: 9': 'external_status',
'Unnamed: 10': 'planned_test_items',
'Unnamed: 11': 'test_nature',
'Unnamed: 12': 'receipt_method',
'Unnamed: 13': 'delivery_person',
'Unnamed: 14': 'receiver',
'交样情况': 'handover_date',
'Unnamed: 16': 'handover_status',
'Unnamed: 17': 'actual_test_items',
'Unnamed: 18': 'handover_method',
'Unnamed: 19': 'registrar',
'Unnamed: 20': 'handover_person',
'Unnamed: 21': 'recipient',
'流转信息登记': 'transfer_info',
'备注': 'remark',
}
df = df.rename(columns=column_mapping)
# 确定输出路径
if output_path is None:
output_path = Path(excel_path).with_suffix('.csv')
# 保存 CSV
df.to_csv(output_path, index=False, encoding='utf-8-sig')
print(f"CSV 转换完成!")
print(f" 输出文件: {output_path}")
print(f" 样品数量: {len(df)}")
return df
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='样品台账转换工具')
parser.add_argument('input', help='输入 Excel 文件路径')
parser.add_argument('-o', '--output', help='输出文件路径(可选)')
parser.add_argument('-f', '--format', choices=['json', 'csv', 'both'],
default='both', help='输出格式(默认: both')
args = parser.parse_args()
if args.format in ['json', 'both']:
convert_excel_to_json(args.input,
args.output if args.format == 'json' else None)
if args.format in ['csv', 'both']:
csv_output = args.output
if args.format == 'both' and args.output:
csv_output = Path(args.output).with_suffix('.csv')
convert_excel_to_csv(args.input, csv_output)