ETest-Vue-FastAPI/scripts/convert_sample_ledger.py

"""
样品台账转换脚本
将 Excel 台账转换为标准 JSON 格式，便于导入系统
"""
import pandas as pd
import json
import sys
from datetime import datetime
from pathlib import Path


def convert_excel_to_json(excel_path, output_path=None):
    """
    将样品台账 Excel 转换为标准 JSON 格式
    
    Args:
        excel_path: Excel 文件路径
        output_path: 输出 JSON 文件路径（可选，默认与 Excel 同目录）
    """
    # 读取 Excel，跳过第一行（标题行），第二行是列名
    df = pd.read_excel(excel_path, header=1)
    
    # 删除第一行（列名解释行）
    df = df[df['收样情况'] != '序号'].reset_index(drop=True)
    
    # 转换日期格式
    def parse_date(val):
        if pd.isna(val):
            return None
        if isinstance(val, datetime):
            return val.strftime('%Y-%m-%d')
        return str(val)[:10] if len(str(val)) >= 10 else str(val)
    
    # 构建标准格式数据
    samples = []
    for idx, row in df.iterrows():
        # 跳过空行
        if pd.isna(row.get('收样情况')):
            continue
            
        sample = {
            # 基本信息
            'seq_no': int(row['收样情况']) if pd.notna(row['收样情况']) else None,
            'receipt_date': parse_date(row.get('Unnamed: 1')),
            'storage_flag': row.get('Unnamed: 2') if pd.notna(row.get('Unnamed: 2')) else None,
            'commission_no': row.get('Unnamed: 3') if pd.notna(row.get('Unnamed: 3')) else None,
            
            # 样品信息
            'sample_type': row.get('Unnamed: 4') if pd.notna(row.get('Unnamed: 4')) else None,
            'sample_sn': str(row.get('Unnamed: 5')) if pd.notna(row.get('Unnamed: 5')) else None,
            'hardware_version': row.get('Unnamed: 6') if pd.notna(row.get('Unnamed: 6')) else None,
            'batch_no': row.get('Unnamed: 7') if pd.notna(row.get('Unnamed: 7')) else None,
            'report_no': row.get('Unnamed: 8') if pd.notna(row.get('Unnamed: 8')) else None,
            
            # 测试信息
            'external_status': row.get('Unnamed: 9') if pd.notna(row.get('Unnamed: 9')) else None,
            'planned_test_items': row.get('Unnamed: 10') if pd.notna(row.get('Unnamed: 10')) else None,
            'test_nature': row.get('Unnamed: 11') if pd.notna(row.get('Unnamed: 11')) else None,
            
            # 收样信息
            'receipt_method': row.get('Unnamed: 12') if pd.notna(row.get('Unnamed: 12')) else None,
            'delivery_person': row.get('Unnamed: 13') if pd.notna(row.get('Unnamed: 13')) else None,
            'receiver': row.get('Unnamed: 14') if pd.notna(row.get('Unnamed: 14')) else None,
            
            # 交样信息
            'handover_date': parse_date(row.get('交样情况')),
            'handover_status': row.get('Unnamed: 16') if pd.notna(row.get('Unnamed: 16')) else None,
            'actual_test_items': row.get('Unnamed: 17') if pd.notna(row.get('Unnamed: 17')) else None,
            'handover_method': row.get('Unnamed: 18') if pd.notna(row.get('Unnamed: 18')) else None,
            'registrar': row.get('Unnamed: 19') if pd.notna(row.get('Unnamed: 19')) else None,
            'handover_person': row.get('Unnamed: 20') if pd.notna(row.get('Unnamed: 20')) else None,
            'recipient': row.get('Unnamed: 21') if pd.notna(row.get('Unnamed: 21')) else None,
            
            # 其他
            'transfer_info': row.get('流转信息登记') if pd.notna(row.get('流转信息登记')) else None,
            'remark': row.get('备注') if pd.notna(row.get('备注')) else None,
        }
        
        # 只添加有数据的样品
        if sample['sample_sn'] or sample['sample_type']:
            samples.append(sample)
    
    # 构建输出数据结构
    output = {
        'metadata': {
            'source_file': Path(excel_path).name,
            'export_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'total_count': len(samples),
            'version': '1.0'
        },
        'samples': samples
    }
    
    # 确定输出路径
    if output_path is None:
        output_path = Path(excel_path).with_suffix('.json')
    
    # 保存 JSON
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(output, f, ensure_ascii=False, indent=2)
    
    print(f"转换完成！")
    print(f"  输入文件: {excel_path}")
    print(f"  输出文件: {output_path}")
    print(f"  样品数量: {len(samples)}")
    
    return output


def convert_excel_to_csv(excel_path, output_path=None):
    """
    将样品台账 Excel 转换为标准 CSV 格式
    """
    # 读取 Excel
    df = pd.read_excel(excel_path, header=1)
    
    # 删除第一行（列名解释行）
    df = df[df['收样情况'] != '序号'].reset_index(drop=True)
    
    # 重命名列为标准名称
    column_mapping = {
        '收样情况': 'seq_no',
        'Unnamed: 1': 'receipt_date',
        'Unnamed: 2': 'storage_flag',
        'Unnamed: 3': 'commission_no',
        'Unnamed: 4': 'sample_type',
        'Unnamed: 5': 'sample_sn',
        'Unnamed: 6': 'hardware_version',
        'Unnamed: 7': 'batch_no',
        'Unnamed: 8': 'report_no',
        'Unnamed: 9': 'external_status',
        'Unnamed: 10': 'planned_test_items',
        'Unnamed: 11': 'test_nature',
        'Unnamed: 12': 'receipt_method',
        'Unnamed: 13': 'delivery_person',
        'Unnamed: 14': 'receiver',
        '交样情况': 'handover_date',
        'Unnamed: 16': 'handover_status',
        'Unnamed: 17': 'actual_test_items',
        'Unnamed: 18': 'handover_method',
        'Unnamed: 19': 'registrar',
        'Unnamed: 20': 'handover_person',
        'Unnamed: 21': 'recipient',
        '流转信息登记': 'transfer_info',
        '备注': 'remark',
    }
    
    df = df.rename(columns=column_mapping)
    
    # 确定输出路径
    if output_path is None:
        output_path = Path(excel_path).with_suffix('.csv')
    
    # 保存 CSV
    df.to_csv(output_path, index=False, encoding='utf-8-sig')
    
    print(f"CSV 转换完成！")
    print(f"  输出文件: {output_path}")
    print(f"  样品数量: {len(df)}")
    
    return df


if __name__ == '__main__':
    import argparse
    
    parser = argparse.ArgumentParser(description='样品台账转换工具')
    parser.add_argument('input', help='输入 Excel 文件路径')
    parser.add_argument('-o', '--output', help='输出文件路径（可选）')
    parser.add_argument('-f', '--format', choices=['json', 'csv', 'both'], 
                        default='both', help='输出格式（默认: both）')
    
    args = parser.parse_args()
    
    if args.format in ['json', 'both']:
        convert_excel_to_json(args.input, 
                             args.output if args.format == 'json' else None)
    
    if args.format in ['csv', 'both']:
        csv_output = args.output
        if args.format == 'both' and args.output:
            csv_output = Path(args.output).with_suffix('.csv')
        convert_excel_to_csv(args.input, csv_output)