PCM_Report/report_generator.py

from __future__ import annotations
import os, json, subprocess, sys
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
import pandas as pd
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from config_model import AppConfig, PlaceholderConfig, DbConnectionConfig
from influx_service import InfluxConnectionParams, InfluxService
from logger import get_logger

logger = get_logger()
_PROGRESS_CB: Optional[Callable[[str, int, int], None]] = None

def set_progress_callback(cb):
    global _PROGRESS_CB; _PROGRESS_CB = cb
def _progress(msg, cur, total):
    if _PROGRESS_CB: _PROGRESS_CB(msg, cur, total)

def _build_influx_service(cfg):
    return InfluxService(InfluxConnectionParams(url=cfg.influx.url, org=cfg.influx.org, token=cfg.influx.token))

def _execute_db_query(ph, db_cfg):
    query = (ph.dbQuery or "").strip()
    if not query: return ""
    if not db_cfg: db_cfg = DbConnectionConfig()
    engine = (db_cfg.engine or "mysql").lower()

    if engine in ("sqlite", "sqlite3"):
        import sqlite3
        conn = sqlite3.connect(db_cfg.database or str(Path(__file__).parent / "experiments.db"))
        result = conn.execute(query).fetchone()
        conn.close()
        return str(result[0]) if result and result[0] else ""
    elif engine == "mysql":
        import pymysql
        conn = pymysql.connect(host=getattr(db_cfg, "host", "localhost"), port=int(getattr(db_cfg, "port", 3306)),
                               user=getattr(db_cfg, "username", ""), password=getattr(db_cfg, "password", ""),
                               database=getattr(db_cfg, "database", ""), charset="utf8mb4")
        with conn.cursor() as cursor:
            cursor.execute(query)
            result = cursor.fetchone()
        conn.close()
        return str(result[0]) if result and result[0] else ""
    return ""

def _load_script_data_from_db(experiment_id):
    try:
        import sqlite3
        conn = sqlite3.connect(str(Path(__file__).parent / "experiments.db"))
        result = conn.execute("SELECT script_data FROM experiments WHERE id=?", (experiment_id,)).fetchone()
        conn.close()
        if result and result[0]:
            logger.info("从数据库加载脚本数据，实验ID: %d", experiment_id)
            return json.loads(result[0])
    except Exception as e:
        logger.error("加载脚本数据失败: %s", e)
    return None

def _load_experiment_info(experiment_id):
    """加载实验信息，判断是否正常（有脚本数据即为正常）"""
    try:
        import sqlite3
        conn = sqlite3.connect(str(Path(__file__).parent / "experiments.db"))
        result = conn.execute("SELECT script_data FROM experiments WHERE id=?", (experiment_id,)).fetchone()
        conn.close()
        if result:
            # 如果有脚本数据（不为None且不为空），就认为是正常的
            script_data = result[0]
            is_normal = script_data is not None and str(script_data).strip() != ""
            return {'is_normal': is_normal}
    except Exception as e:
        logger.error("加载实验信息失败: %s", e)
    return None

def _parse_script_tables(script_data):
    tables = {}
    if isinstance(script_data, dict) and "tables" in script_data:
        for item in script_data["tables"]:
            key = item.get("token") or item.get("key")
            if key: tables[str(key)] = item
    return tables

def _replace_global_params(text, cfg):
    """替换文本中的 @参数名 为全局参数的值"""
    if not text or '@' not in text: return text
    result = text
    if hasattr(cfg, 'globalParameters') and hasattr(cfg.globalParameters, 'parameters'):
        import re
        for param_name in re.findall(r'@(\w+)', text):
            if param_name in cfg.globalParameters.parameters:
                result = result.replace(f'@{param_name}', cfg.globalParameters.parameters[param_name])
    return result

def _make_seconds_index(df):
    if "_time" in df.columns:
        t = pd.to_datetime(df["_time"])
        return (t - t.iloc[0]).dt.total_seconds().round().astype(int)
    return pd.Series(range(len(df)))

def _format_numeric_columns(df, exclude_cols):
    if df is None or df.empty: return df
    result = df.copy()
    for col in result.columns:
        if col not in exclude_cols:
            try:
                numeric = pd.to_numeric(result[col], errors="coerce")
                if numeric.notna().any(): result[col] = numeric.round(2)
            except: pass
    return result

def _to_wide_table(df, fields, first_column, titles_map, first_title=None):
    if df.empty: return pd.DataFrame()
    work = df.copy()
    if "_time" not in work.columns or "_value" not in work.columns: return work
    if fields and "_field" in work.columns: work = work[work["_field"].isin(fields)]

    if first_column == "seconds":
        idx = _make_seconds_index(work)
        work = work.assign(__index__=idx)
        index_col, index_title = "__index__", first_title or "秒"
    else:
        index_col, index_title = "_time", first_title or "时间"

    if "_field" in work.columns:
        wide = work.pivot_table(index=index_col, columns="_field", values="_value", aggfunc="last")
    else:
        wide = work.set_index(index_col)[["_value"]]
        wide.columns = ["value"]

    wide = wide.sort_index()
    wide.reset_index(inplace=True)
    wide.rename(columns={index_col: index_title}, inplace=True)
    for f, title in titles_map.items():
        if f in wide.columns: wide.rename(columns={f: title}, inplace=True)
    return _format_numeric_columns(wide, exclude_cols=[index_title])


# ============================================================
#  核心：跨 run 占位符替换（处理 Word 将 {token} 拆分到多个 run 的情况）
# ============================================================

def _replace_token_across_runs(paragraph, token, replacement):
    """在段落中替换占位符，处理 token 被拆分到多个 run 的情况。

    例如 Word 可能将 {text4} 拆分成: Run('{')+Run('text4')+Run('}')
    或将 {isNormal} 拆分成: Run('{isNormal')+Run('}')
    """
    runs = paragraph.runs
    if not runs:
        return False

    # 快速路径：token 在单个 run 中
    for run in runs:
        if token in run.text:
            run.text = run.text.replace(token, replacement)
            return True

    # 慢速路径：token 跨越多个 run
    texts = [r.text for r in runs]
    full = ''.join(texts)
    idx = full.find(token)
    if idx < 0:
        return False

    token_end = idx + len(token)

    # 计算每个 run 的字符区间 [start, end)
    boundaries = []
    pos = 0
    for t in texts:
        boundaries.append((pos, pos + len(t)))
        pos += len(t)

    # 找到 token 覆盖的第一个和最后一个 run
    first_ri = last_ri = -1
    for i, (s, e) in enumerate(boundaries):
        if s <= idx < e and first_ri < 0:
            first_ri = i
        if s < token_end <= e:
            last_ri = i
            break

    if first_ri < 0 or last_ri < 0:
        return False

    # 保留 token 前后的文本
    before = texts[first_ri][:idx - boundaries[first_ri][0]]
    after = texts[last_ri][token_end - boundaries[last_ri][0]:]

    # 将替换内容写入第一个受影响的 run
    runs[first_ri].text = before + replacement + after

    # 清空中间和最后受影响的 run
    for i in range(first_ri + 1, last_ri + 1):
        runs[i].text = ''

    return True


def _replace_texts_docx(doc, mapping):
    """替换文档中所有的 {key} 占位符，包括段落和表格单元格"""
    for key, val in mapping.items():
        token = '{' + key + '}'
        replacement = val or ''

        # 替换正文段落
        for para in doc.paragraphs:
            if token in para.text:
                _replace_token_across_runs(para, token, replacement)

        # 替换表格单元格（跳过合并单元格的重复项）
        for table in doc.tables:
            for row in table.rows:
                seen_tc = set()
                for cell in row.cells:
                    tc_id = id(cell._tc)
                    if tc_id in seen_tc:
                        continue
                    seen_tc.add(tc_id)
                    for para in cell.paragraphs:
                        if token in para.text:
                            _replace_token_across_runs(para, token, replacement)


# ============================================================
#  核心：表格数据填充（正确处理合并单元格的坐标映射）
# ============================================================

def _get_unique_cells(row):
    """获取行中的唯一单元格列表（合并单元格只返回一次）"""
    seen = set()
    cells = []
    for cell in row.cells:
        tc_id = id(cell._tc)
        if tc_id not in seen:
            seen.add(tc_id)
            cells.append(cell)
    return cells


def _fill_script_table_docx(doc, token, table_spec):
    """填充脚本表格数据到 Word 文档中。

    坐标系统说明：
    - 脚本数据中的 row 是表格的绝对行号
    - 脚本数据中的 col 是相对于 token 所在"唯一单元格"位置的偏移
    - 合并单元格被视为一个单元格，因此 col=1 跳过合并区域到达下一个唯一单元格
    """
    cells_data = table_spec.get("cells") or []
    if not cells_data:
        return

    token_with_braces = '{' + token + '}'
    table_found = None
    token_row = 0
    token_unique_col = 0

    # 在表格中查找 token
    for table in doc.tables:
        for ri, row in enumerate(table.rows):
            unique = _get_unique_cells(row)
            for uci, cell in enumerate(unique):
                if token_with_braces in cell.text:
                    table_found = table
                    token_row = ri
                    token_unique_col = uci
                    break
            if table_found:
                break
        if table_found:
            break

    if not table_found:
        logger.warning("未找到 token: %s", token_with_braces)
        return

    logger.info("找到 token %s 在表格 row=%d, unique_col=%d", token_with_braces, token_row, token_unique_col)

    # 清除 token 文本（保留同一单元格中的其他文字如"环境温度"）
    target_cell = _get_unique_cells(table_found.rows[token_row])[token_unique_col]
    for para in target_cell.paragraphs:
        _replace_token_across_runs(para, token_with_braces, '')

    # 填充数据
    for cell_info in cells_data:
        if not isinstance(cell_info, dict):
            continue
        value = cell_info.get("value")
        if value is None:
            continue

        data_row = int(cell_info.get("row", 0))
        data_col = int(cell_info.get("col", 0))

        try:
            if data_row >= len(table_found.rows):
                logger.warning("行 %d 超出表格范围 (%d行)", data_row, len(table_found.rows))
                continue

            unique = _get_unique_cells(table_found.rows[data_row])
            target_idx = token_unique_col + data_col

            if target_idx >= len(unique):
                logger.warning("列 %d (target_idx=%d) 超出范围 (%d列)", data_col, target_idx, len(unique))
                continue

            cell = unique[target_idx]
            para = cell.paragraphs[0] if cell.paragraphs else None

            if para is None:
                cell.text = str(value)
            elif para.runs:
                # 有现有 run，修改第一个 run 的文本
                para.runs[0].text = str(value)
                # 清空其余 run
                for r in para.runs[1:]:
                    r.text = ''
            else:
                # 没有 run，添加一个新 run
                para.add_run(str(value))

            # 设置居中对齐
            if para is not None:
                para.alignment = WD_ALIGN_PARAGRAPH.CENTER

        except Exception as e:
            logger.warning("填充失败 row=%d col=%d: %s", data_row, data_col, e)


# ============================================================
#  报告生成入口
# ============================================================

def render_report(template_path, cfg, output_path, experiment_id=None):
    logger.info("=== 开始生成报告 ===")
    _progress("加载数据", 0, 5)

    # 加载脚本数据和实验信息
    script_data = _load_script_data_from_db(experiment_id) if experiment_id else None
    script_tables = _parse_script_tables(script_data)
    logger.info("脚本表格: %s", list(script_tables.keys()))

    # 打开模板
    doc = Document(str(template_path))
    _progress("替换文本", 1, 5)

    # 构建文本映射
    text_map = {}
    if hasattr(cfg, 'placeholders'):
        placeholders = cfg.placeholders if isinstance(cfg.placeholders, dict) else {}
        for key, ph in placeholders.items():
            if hasattr(ph, 'type'):
                if ph.type == "text" and hasattr(ph, 'value'):
                    text_map[key] = _replace_global_params(ph.value or '', cfg)
                elif ph.type == "dbText" and hasattr(ph, 'dbQuery'):
                    text_map[key] = _execute_db_query(ph, getattr(cfg, 'db', None))

    # 添加实验信息占位符（isNormal 打勾）
    # 无论如何都要添加，避免占位符未被替换
    is_normal_checked = ''
    if experiment_id:
        exp_info = _load_experiment_info(experiment_id)
        if exp_info and exp_info.get('is_normal'):
            is_normal_checked = '\u2611'
    text_map['isNormal'] = is_normal_checked

    logger.info("文本映射: %d 个, keys=%s", len(text_map), list(text_map.keys()))
    _replace_texts_docx(doc, text_map)

    # 填充脚本表格数据
    _progress("填充表格", 2, 5)
    for token, spec in script_tables.items():
        _fill_script_table_docx(doc, token, spec)

    # 保存
    _progress("保存", 4, 5)
    doc.save(str(output_path))
    _progress("完成", 5, 5)
    logger.info("=== 报告生成完成: %s ===", output_path)
    return output_path
def _execute_experiment_script(cfg: AppConfig) -> Optional[Dict]:
    """
    执行实验流程中的Python脚本

    Args:
        cfg: 应用配置

    Returns:
        脚本返回的JSON数据，如果没有脚本或执行失败返回None
    """

    logger.info("_execute_experiment_script invoked")

    if not cfg.experimentProcess.scriptFile:

        logger.info("No experiment script configured")
        return None

    try:
        import base64
        import json
        import tempfile
        import subprocess
        import sys
        from shutil import which
        import io
        import runpy
        import os

        # 解码base64脚本
        logger.info("Decoded script length: %d", len(cfg.experimentProcess.scriptFile))
        script_content = base64.b64decode(cfg.experimentProcess.scriptFile)
        logger.info("Script bytes size: %d", len(script_content))

        # 创建临时文件执行脚本，使用较短的临时目录路径
        temp_dir = tempfile.gettempdir()
        logger.debug("System temp directory: %s", temp_dir)

        # 创建临时脚本文件
        with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', dir=temp_dir, delete=False) as tmp_file:
            tmp_file.write(script_content)
            tmp_script_path = tmp_file.name

        # 记录临时文件路径长度，便于调试
        logger.debug("Temp script path length: %d, path: %s", len(tmp_script_path), tmp_script_path)
        if len(tmp_script_path) > 250:
            logger.warning("Temp script path is quite long (%d chars), might cause issues on Windows", len(tmp_script_path))

        try:
            # 构造传入脚本的 experimentProcess JSON（原样传递该对象本身）
            cfg_dict = cfg.to_dict()
            exp_obj = cfg_dict.get("experimentProcess", {})
            exp_json = json.dumps(exp_obj, ensure_ascii=False)
            logger.info("Experiment script payload (first 300 chars): %s", exp_json[:300])

            # 检查JSON数据大小，过大可能引起问题
            exp_json_size = len(exp_json)
            if exp_json_size > 10000:
                logger.warning("Experiment JSON is quite large (%d chars), might cause issues on Windows", exp_json_size)

            # 提取时间范围
            experiment_start = os.environ.get('EXPERIMENT_START', '').strip()
            experiment_end = os.environ.get('EXPERIMENT_END', '').strip()

            # 最高优先级：调用方已通过环境变量传入（例如实验历史列表）
            if not experiment_start or not experiment_end:
                # 其次尝试占位符配置中的 timeRange
                for ph in cfg.placeholders.values():
                    if ph.influx and ph.influx.timeRange:
                        time_range = ph.influx.timeRange.strip()
                        if "start:" in time_range and "stop:" in time_range:
                            try:
                                parts = time_range.split(",")
                                local_start = experiment_start
                                local_end = experiment_end
                                for part in parts:
                                    part = part.strip()
                                    if part.startswith("start:"):
                                        local_start = part.replace("start:", "").strip()
                                    elif part.startswith("stop:"):
                                        local_end = part.replace("stop:", "").strip()
                                if local_start and local_end:
                                    experiment_start = experiment_start or local_start
                                    experiment_end = experiment_end or local_end
                                    break
                            except Exception as e:
                                logger.warning("Failed to parse timeRange: %s", e)

            # 最后回退：如果仍缺失，尝试使用实验流程 remark 中的时间范围（格式 start=...,end=...）
            if (not experiment_start or not experiment_end) and cfg.experimentProcess.remark:
                remark = cfg.experimentProcess.remark
                try:
                    if "start=" in remark and "end=" in remark:
                        parts = remark.split(",")
                        local_start = experiment_start
                        local_end = experiment_end
                        for part in parts:
                            part = part.strip()
                            if part.startswith("start="):
                                local_start = part.replace("start=", "").strip()
                            elif part.startswith("end="):
                                local_end = part.replace("end=", "").strip()
                        if local_start and local_end:
                            experiment_start = experiment_start or local_start
                            experiment_end = experiment_end or local_end
                except Exception as e:
                    logger.warning("Failed to parse remark for time range: %s", e)

            # 准备环境变量，避免传递过大数据
            script_env = os.environ.copy()
            script_env.update({
                'PYTHONIOENCODING': 'utf-8',
            })

            # 仅在数据不太大时才通过环境变量传递
            if exp_json_size < 8192:
                script_env['EXPERIMENT_JSON'] = exp_json
            else:
                logger.info("EXPERIMENT_JSON is too large for environment variable, will pass via stdin only")

            # 添加实验时间范围
            if experiment_start:
                script_env['EXPERIMENT_START'] = experiment_start
            if experiment_end:
                script_env['EXPERIMENT_END'] = experiment_end

            # 添加 InfluxDB 配置
            if cfg.influx.url:
                script_env['INFLUX_URL'] = cfg.influx.url
            if cfg.influx.org:
                script_env['INFLUX_ORG'] = cfg.influx.org
            if cfg.influx.token:
                script_env['INFLUX_TOKEN'] = cfg.influx.token

            # 从配置中提取 bucket 和 measurement（从第一个 table/chart 占位符）
            for ph in cfg.placeholders.values():
                if ph.influx:
                    if ph.influx.bucket:
                        script_env['INFLUX_BUCKET'] = ph.influx.bucket
                    if ph.influx.measurement:
                        script_env['INFLUX_MEASUREMENT'] = ph.influx.measurement
                    if ph.influx.bucket or ph.influx.measurement:
                        break  # 使用第一个找到的配置

            # 选择执行方式：检查是否在打包环境中
            # 打包环境通过检查 sys.frozen 属性判断
            is_frozen = getattr(sys, 'frozen', False)
            candidates: List[List[str]] = []

            # 仅在非打包环境（开发态）下尝试外部解释器
            if not is_frozen:
                if which('python'):
                    candidates.append(['python', tmp_script_path])
                if sys.platform.startswith('win') and which('py'):
                    candidates.append(['py', '-3', tmp_script_path])

            logger.info("Is frozen (packaged): %s", is_frozen)
            logger.info("Experiment script candidates: %s", candidates)

            stdout_text: str = ""
            stderr_text: str = ""

            logger.info("Executing experiment script: %s", cfg.experimentProcess.scriptName)
            if experiment_start and experiment_end:
                logger.info("Experiment time range: %s to %s", experiment_start, experiment_end)
            used_external = False
            if candidates:
                last_err = None
                result = None
                for cmd in candidates:
                    try:
                        # 修复：移除传递给脚本的参数，避免参数解析错误
                        result = subprocess.run(
                            cmd,
                            capture_output=True,
                            text=True,
                            encoding='utf-8',
                            errors='replace',
                            timeout=30,
                            env=script_env,
                            input=exp_json,  # 通过stdin传递数据，避免环境变量限制
                        )
                        break
                    except Exception as e:
                        last_err = e
                        logger.warning("Failed to execute script with command %s: %s", cmd, e)
                        continue
                used_external = result is not None
                if result is None:
                    if last_err:
                        raise last_err
                    raise RuntimeError('Failed to execute script with external Python')
                stdout_text = (result.stdout or '')
                stderr_text = (result.stderr or '')

                # 增强错误处理：记录详细的错误信息
                if result.returncode != 0:
                    logger.error("Script execution failed (ext): return_code=%d, stdout=%s, stderr=%s",
                                 result.returncode, stdout_text, stderr_text)
                    return None
            else:
                # 在打包环境或无外部解释器时，进程内执行脚本：
                buf_out = io.StringIO()
                buf_err = io.StringIO()
                fake_in = io.StringIO(exp_json)
                old_env = dict(os.environ)
                os.environ.update(script_env)
                old_stdin, old_stdout, old_stderr, old_argv = sys.stdin, sys.stdout, sys.stderr, sys.argv
                script_executed = False
                try:
                    sys.stdin = fake_in
                    sys.stdout = buf_out
                    sys.stderr = buf_err
                    sys.argv = [tmp_script_path]
                    # 以 __main__ 方式执行脚本文件
                    runpy.run_path(tmp_script_path, run_name='__main__')
                    script_executed = True
                    stdout_text = buf_out.getvalue()
                    stderr_text = buf_err.getvalue()
                except SystemExit as e:
                    # 脚本可能调用了 sys.exit；非零即视为失败
                    script_executed = True
                    stdout_text = buf_out.getvalue()
                    stderr_text = (buf_err.getvalue() or '') + f"\n(SystemExit: {e.code})"
                    if getattr(e, 'code', 0) not in (None, 0):
                        logger.error("Script execution failed (in-proc): %s", stderr_text)
                        return None
                except Exception as e:
                    script_executed = True
                    logger.error("Script execution error (in-proc): %s", e, exc_info=True)
                    return None
                finally:
                    sys.stdin, sys.stdout, sys.stderr, sys.argv = old_stdin, old_stdout, old_stderr, old_argv
                    os.environ.clear(); os.environ.update(old_env)
                    # 增强错误处理：即使在异常情况下也记录执行状态
                    if not script_executed:
                        logger.error("Script failed to execute (in-proc): unknown error occurred")
                        return None

            # 增强错误处理：检查执行结果
            if used_external and result is not None and result.returncode != 0:
                logger.error("Script execution failed: return_code=%d, stdout=%s, stderr=%s",
                             result.returncode, result.stdout, result.stderr)
                return None

            # 解析JSON输出
            output = (stdout_text or '').strip()

            if not output:
                logger.warning("Script executed but returned no output; applying fallback to EXPERIMENT_JSON")
                output = exp_json

            try:
                data = json.loads(output)

                if isinstance(data, dict):
                    if 'tables' in data:
                        tables = data['tables']
                        if isinstance(tables, list) and tables:
                            first_table = tables[0]
                            if isinstance(first_table, dict):
                                cells = first_table.get('cells', [])

            except Exception as e:
                # 增强错误处理：提供更详细的错误信息
                logger.error("Failed to parse script output as JSON: error=%s, output=%s", e, output[:1000])
                return None

            logger.info("Experiment script stdout: %s", output[:500])
            logger.info("Script executed successfully, data length: headers=%d, rows=%d",
                        len(data.get('headers', []) if isinstance(data, dict) else []),
                        len(data.get('rows', []) if isinstance(data, dict) else []))

            return data

        finally:
            # 清理临时文件
            try:
                os.unlink(tmp_script_path)
            except Exception as e:
                logger.warning("Failed to remove temporary script file: %s", e)

    except OSError as e:
        # 特别处理Windows文件路径相关的错误
        if e.winerror == 206:  # 文件名或扩展名太长
            logger.error("Failed to execute experiment script due to Windows path length limitation: %s", e)
            logger.error("Consider reducing the size of the script or using a shorter temp directory")
        else:
            logger.error("OS error while executing experiment script: %s", e, exc_info=True)
        return None
    except Exception as e:
        logger.error("Failed to execute experiment script: %s", e, exc_info=True)
        return None