from __future__ import annotations import os, json, subprocess, sys from pathlib import Path from typing import Any, Callable, Dict, List, Optional import pandas as pd from docx import Document from docx.enum.text import WD_ALIGN_PARAGRAPH from config_model import AppConfig, PlaceholderConfig, DbConnectionConfig from influx_service import InfluxConnectionParams, InfluxService from logger import get_logger logger = get_logger() _PROGRESS_CB: Optional[Callable[[str, int, int], None]] = None def set_progress_callback(cb): global _PROGRESS_CB; _PROGRESS_CB = cb def _progress(msg, cur, total): if _PROGRESS_CB: _PROGRESS_CB(msg, cur, total) def _build_influx_service(cfg): return InfluxService(InfluxConnectionParams(url=cfg.influx.url, org=cfg.influx.org, token=cfg.influx.token)) def _execute_db_query(ph, db_cfg): query = (ph.dbQuery or "").strip() if not query: return "" if not db_cfg: db_cfg = DbConnectionConfig() engine = (db_cfg.engine or "mysql").lower() if engine in ("sqlite", "sqlite3"): import sqlite3 conn = sqlite3.connect(db_cfg.database or str(Path(__file__).parent / "experiments.db")) result = conn.execute(query).fetchone() conn.close() return str(result[0]) if result and result[0] else "" elif engine == "mysql": import pymysql conn = pymysql.connect(host=getattr(db_cfg, "host", "localhost"), port=int(getattr(db_cfg, "port", 3306)), user=getattr(db_cfg, "username", ""), password=getattr(db_cfg, "password", ""), database=getattr(db_cfg, "database", ""), charset="utf8mb4") with conn.cursor() as cursor: cursor.execute(query) result = cursor.fetchone() conn.close() return str(result[0]) if result and result[0] else "" return "" def _load_script_data_from_db(experiment_id): try: import sqlite3 conn = sqlite3.connect(str(Path(__file__).parent / "experiments.db")) result = conn.execute("SELECT script_data FROM experiments WHERE id=?", (experiment_id,)).fetchone() conn.close() if result and result[0]: logger.info("从数据库加载脚本数据,实验ID: %d", experiment_id) return json.loads(result[0]) except Exception as e: logger.error("加载脚本数据失败: %s", e) return None def _load_experiment_info(experiment_id): """加载实验信息,判断是否正常(有脚本数据即为正常)""" try: import sqlite3 conn = sqlite3.connect(str(Path(__file__).parent / "experiments.db")) result = conn.execute("SELECT script_data FROM experiments WHERE id=?", (experiment_id,)).fetchone() conn.close() if result: # 如果有脚本数据(不为None且不为空),就认为是正常的 script_data = result[0] is_normal = script_data is not None and str(script_data).strip() != "" return {'is_normal': is_normal} except Exception as e: logger.error("加载实验信息失败: %s", e) return None def _parse_script_tables(script_data): tables = {} if isinstance(script_data, dict) and "tables" in script_data: for item in script_data["tables"]: key = item.get("token") or item.get("key") if key: tables[str(key)] = item return tables def _replace_global_params(text, cfg): """替换文本中的 @参数名 为全局参数的值""" if not text or '@' not in text: return text result = text if hasattr(cfg, 'globalParameters') and hasattr(cfg.globalParameters, 'parameters'): import re for param_name in re.findall(r'@(\w+)', text): if param_name in cfg.globalParameters.parameters: result = result.replace(f'@{param_name}', cfg.globalParameters.parameters[param_name]) return result def _make_seconds_index(df): if "_time" in df.columns: t = pd.to_datetime(df["_time"]) return (t - t.iloc[0]).dt.total_seconds().round().astype(int) return pd.Series(range(len(df))) def _format_numeric_columns(df, exclude_cols): if df is None or df.empty: return df result = df.copy() for col in result.columns: if col not in exclude_cols: try: numeric = pd.to_numeric(result[col], errors="coerce") if numeric.notna().any(): result[col] = numeric.round(2) except: pass return result def _to_wide_table(df, fields, first_column, titles_map, first_title=None): if df.empty: return pd.DataFrame() work = df.copy() if "_time" not in work.columns or "_value" not in work.columns: return work if fields and "_field" in work.columns: work = work[work["_field"].isin(fields)] if first_column == "seconds": idx = _make_seconds_index(work) work = work.assign(__index__=idx) index_col, index_title = "__index__", first_title or "秒" else: index_col, index_title = "_time", first_title or "时间" if "_field" in work.columns: wide = work.pivot_table(index=index_col, columns="_field", values="_value", aggfunc="last") else: wide = work.set_index(index_col)[["_value"]] wide.columns = ["value"] wide = wide.sort_index() wide.reset_index(inplace=True) wide.rename(columns={index_col: index_title}, inplace=True) for f, title in titles_map.items(): if f in wide.columns: wide.rename(columns={f: title}, inplace=True) return _format_numeric_columns(wide, exclude_cols=[index_title]) # ============================================================ # 核心:跨 run 占位符替换(处理 Word 将 {token} 拆分到多个 run 的情况) # ============================================================ def _replace_token_across_runs(paragraph, token, replacement): """在段落中替换占位符,处理 token 被拆分到多个 run 的情况。 例如 Word 可能将 {text4} 拆分成: Run('{')+Run('text4')+Run('}') 或将 {isNormal} 拆分成: Run('{isNormal')+Run('}') """ runs = paragraph.runs if not runs: return False # 快速路径:token 在单个 run 中 for run in runs: if token in run.text: run.text = run.text.replace(token, replacement) return True # 慢速路径:token 跨越多个 run texts = [r.text for r in runs] full = ''.join(texts) idx = full.find(token) if idx < 0: return False token_end = idx + len(token) # 计算每个 run 的字符区间 [start, end) boundaries = [] pos = 0 for t in texts: boundaries.append((pos, pos + len(t))) pos += len(t) # 找到 token 覆盖的第一个和最后一个 run first_ri = last_ri = -1 for i, (s, e) in enumerate(boundaries): if s <= idx < e and first_ri < 0: first_ri = i if s < token_end <= e: last_ri = i break if first_ri < 0 or last_ri < 0: return False # 保留 token 前后的文本 before = texts[first_ri][:idx - boundaries[first_ri][0]] after = texts[last_ri][token_end - boundaries[last_ri][0]:] # 将替换内容写入第一个受影响的 run runs[first_ri].text = before + replacement + after # 清空中间和最后受影响的 run for i in range(first_ri + 1, last_ri + 1): runs[i].text = '' return True def _replace_texts_docx(doc, mapping): """替换文档中所有的 {key} 占位符,包括段落和表格单元格""" for key, val in mapping.items(): token = '{' + key + '}' replacement = val or '' # 替换正文段落 for para in doc.paragraphs: if token in para.text: _replace_token_across_runs(para, token, replacement) # 替换表格单元格(跳过合并单元格的重复项) for table in doc.tables: for row in table.rows: seen_tc = set() for cell in row.cells: tc_id = id(cell._tc) if tc_id in seen_tc: continue seen_tc.add(tc_id) for para in cell.paragraphs: if token in para.text: _replace_token_across_runs(para, token, replacement) # ============================================================ # 核心:表格数据填充(正确处理合并单元格的坐标映射) # ============================================================ def _get_unique_cells(row): """获取行中的唯一单元格列表(合并单元格只返回一次)""" seen = set() cells = [] for cell in row.cells: tc_id = id(cell._tc) if tc_id not in seen: seen.add(tc_id) cells.append(cell) return cells def _fill_script_table_docx(doc, token, table_spec): """填充脚本表格数据到 Word 文档中。 坐标系统说明: - 脚本数据中的 row 是表格的绝对行号 - 脚本数据中的 col 是相对于 token 所在"唯一单元格"位置的偏移 - 合并单元格被视为一个单元格,因此 col=1 跳过合并区域到达下一个唯一单元格 """ cells_data = table_spec.get("cells") or [] if not cells_data: return token_with_braces = '{' + token + '}' table_found = None token_row = 0 token_unique_col = 0 # 在表格中查找 token for table in doc.tables: for ri, row in enumerate(table.rows): unique = _get_unique_cells(row) for uci, cell in enumerate(unique): if token_with_braces in cell.text: table_found = table token_row = ri token_unique_col = uci break if table_found: break if table_found: break if not table_found: logger.warning("未找到 token: %s", token_with_braces) return logger.info("找到 token %s 在表格 row=%d, unique_col=%d", token_with_braces, token_row, token_unique_col) # 清除 token 文本(保留同一单元格中的其他文字如"环境温度") target_cell = _get_unique_cells(table_found.rows[token_row])[token_unique_col] for para in target_cell.paragraphs: _replace_token_across_runs(para, token_with_braces, '') # 填充数据 for cell_info in cells_data: if not isinstance(cell_info, dict): continue value = cell_info.get("value") if value is None: continue data_row = int(cell_info.get("row", 0)) data_col = int(cell_info.get("col", 0)) try: if data_row >= len(table_found.rows): logger.warning("行 %d 超出表格范围 (%d行)", data_row, len(table_found.rows)) continue unique = _get_unique_cells(table_found.rows[data_row]) target_idx = token_unique_col + data_col if target_idx >= len(unique): logger.warning("列 %d (target_idx=%d) 超出范围 (%d列)", data_col, target_idx, len(unique)) continue cell = unique[target_idx] para = cell.paragraphs[0] if cell.paragraphs else None if para is None: cell.text = str(value) elif para.runs: # 有现有 run,修改第一个 run 的文本 para.runs[0].text = str(value) # 清空其余 run for r in para.runs[1:]: r.text = '' else: # 没有 run,添加一个新 run para.add_run(str(value)) # 设置居中对齐 if para is not None: para.alignment = WD_ALIGN_PARAGRAPH.CENTER except Exception as e: logger.warning("填充失败 row=%d col=%d: %s", data_row, data_col, e) # ============================================================ # 报告生成入口 # ============================================================ def render_report(template_path, cfg, output_path, experiment_id=None): logger.info("=== 开始生成报告 ===") _progress("加载数据", 0, 5) # 加载脚本数据和实验信息 script_data = _load_script_data_from_db(experiment_id) if experiment_id else None script_tables = _parse_script_tables(script_data) logger.info("脚本表格: %s", list(script_tables.keys())) # 打开模板 doc = Document(str(template_path)) _progress("替换文本", 1, 5) # 构建文本映射 text_map = {} if hasattr(cfg, 'placeholders'): placeholders = cfg.placeholders if isinstance(cfg.placeholders, dict) else {} for key, ph in placeholders.items(): if hasattr(ph, 'type'): if ph.type == "text" and hasattr(ph, 'value'): text_map[key] = _replace_global_params(ph.value or '', cfg) elif ph.type == "dbText" and hasattr(ph, 'dbQuery'): text_map[key] = _execute_db_query(ph, getattr(cfg, 'db', None)) # 添加实验信息占位符(isNormal 打勾) # 无论如何都要添加,避免占位符未被替换 is_normal_checked = '' if experiment_id: exp_info = _load_experiment_info(experiment_id) if exp_info and exp_info.get('is_normal'): is_normal_checked = '\u2611' text_map['isNormal'] = is_normal_checked logger.info("文本映射: %d 个, keys=%s", len(text_map), list(text_map.keys())) _replace_texts_docx(doc, text_map) # 填充脚本表格数据 _progress("填充表格", 2, 5) for token, spec in script_tables.items(): _fill_script_table_docx(doc, token, spec) # 保存 _progress("保存", 4, 5) doc.save(str(output_path)) _progress("完成", 5, 5) logger.info("=== 报告生成完成: %s ===", output_path) return output_path def _execute_experiment_script(cfg: AppConfig) -> Optional[Dict]: """ 执行实验流程中的Python脚本 Args: cfg: 应用配置 Returns: 脚本返回的JSON数据,如果没有脚本或执行失败返回None """ logger.info("_execute_experiment_script invoked") if not cfg.experimentProcess.scriptFile: logger.info("No experiment script configured") return None try: import base64 import json import tempfile import subprocess import sys from shutil import which import io import runpy import os # 解码base64脚本 logger.info("Decoded script length: %d", len(cfg.experimentProcess.scriptFile)) script_content = base64.b64decode(cfg.experimentProcess.scriptFile) logger.info("Script bytes size: %d", len(script_content)) # 创建临时文件执行脚本,使用较短的临时目录路径 temp_dir = tempfile.gettempdir() logger.debug("System temp directory: %s", temp_dir) # 创建临时脚本文件 with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', dir=temp_dir, delete=False) as tmp_file: tmp_file.write(script_content) tmp_script_path = tmp_file.name # 记录临时文件路径长度,便于调试 logger.debug("Temp script path length: %d, path: %s", len(tmp_script_path), tmp_script_path) if len(tmp_script_path) > 250: logger.warning("Temp script path is quite long (%d chars), might cause issues on Windows", len(tmp_script_path)) try: # 构造传入脚本的 experimentProcess JSON(原样传递该对象本身) cfg_dict = cfg.to_dict() exp_obj = cfg_dict.get("experimentProcess", {}) exp_json = json.dumps(exp_obj, ensure_ascii=False) logger.info("Experiment script payload (first 300 chars): %s", exp_json[:300]) # 检查JSON数据大小,过大可能引起问题 exp_json_size = len(exp_json) if exp_json_size > 10000: logger.warning("Experiment JSON is quite large (%d chars), might cause issues on Windows", exp_json_size) # 提取时间范围 experiment_start = os.environ.get('EXPERIMENT_START', '').strip() experiment_end = os.environ.get('EXPERIMENT_END', '').strip() # 最高优先级:调用方已通过环境变量传入(例如实验历史列表) if not experiment_start or not experiment_end: # 其次尝试占位符配置中的 timeRange for ph in cfg.placeholders.values(): if ph.influx and ph.influx.timeRange: time_range = ph.influx.timeRange.strip() if "start:" in time_range and "stop:" in time_range: try: parts = time_range.split(",") local_start = experiment_start local_end = experiment_end for part in parts: part = part.strip() if part.startswith("start:"): local_start = part.replace("start:", "").strip() elif part.startswith("stop:"): local_end = part.replace("stop:", "").strip() if local_start and local_end: experiment_start = experiment_start or local_start experiment_end = experiment_end or local_end break except Exception as e: logger.warning("Failed to parse timeRange: %s", e) # 最后回退:如果仍缺失,尝试使用实验流程 remark 中的时间范围(格式 start=...,end=...) if (not experiment_start or not experiment_end) and cfg.experimentProcess.remark: remark = cfg.experimentProcess.remark try: if "start=" in remark and "end=" in remark: parts = remark.split(",") local_start = experiment_start local_end = experiment_end for part in parts: part = part.strip() if part.startswith("start="): local_start = part.replace("start=", "").strip() elif part.startswith("end="): local_end = part.replace("end=", "").strip() if local_start and local_end: experiment_start = experiment_start or local_start experiment_end = experiment_end or local_end except Exception as e: logger.warning("Failed to parse remark for time range: %s", e) # 准备环境变量,避免传递过大数据 script_env = os.environ.copy() script_env.update({ 'PYTHONIOENCODING': 'utf-8', }) # 仅在数据不太大时才通过环境变量传递 if exp_json_size < 8192: script_env['EXPERIMENT_JSON'] = exp_json else: logger.info("EXPERIMENT_JSON is too large for environment variable, will pass via stdin only") # 添加实验时间范围 if experiment_start: script_env['EXPERIMENT_START'] = experiment_start if experiment_end: script_env['EXPERIMENT_END'] = experiment_end # 添加 InfluxDB 配置 if cfg.influx.url: script_env['INFLUX_URL'] = cfg.influx.url if cfg.influx.org: script_env['INFLUX_ORG'] = cfg.influx.org if cfg.influx.token: script_env['INFLUX_TOKEN'] = cfg.influx.token # 从配置中提取 bucket 和 measurement(从第一个 table/chart 占位符) for ph in cfg.placeholders.values(): if ph.influx: if ph.influx.bucket: script_env['INFLUX_BUCKET'] = ph.influx.bucket if ph.influx.measurement: script_env['INFLUX_MEASUREMENT'] = ph.influx.measurement if ph.influx.bucket or ph.influx.measurement: break # 使用第一个找到的配置 # 选择执行方式:检查是否在打包环境中 # 打包环境通过检查 sys.frozen 属性判断 is_frozen = getattr(sys, 'frozen', False) candidates: List[List[str]] = [] # 仅在非打包环境(开发态)下尝试外部解释器 if not is_frozen: if which('python'): candidates.append(['python', tmp_script_path]) if sys.platform.startswith('win') and which('py'): candidates.append(['py', '-3', tmp_script_path]) logger.info("Is frozen (packaged): %s", is_frozen) logger.info("Experiment script candidates: %s", candidates) stdout_text: str = "" stderr_text: str = "" logger.info("Executing experiment script: %s", cfg.experimentProcess.scriptName) if experiment_start and experiment_end: logger.info("Experiment time range: %s to %s", experiment_start, experiment_end) used_external = False if candidates: last_err = None result = None for cmd in candidates: try: # 修复:移除传递给脚本的参数,避免参数解析错误 result = subprocess.run( cmd, capture_output=True, text=True, encoding='utf-8', errors='replace', timeout=30, env=script_env, input=exp_json, # 通过stdin传递数据,避免环境变量限制 ) break except Exception as e: last_err = e logger.warning("Failed to execute script with command %s: %s", cmd, e) continue used_external = result is not None if result is None: if last_err: raise last_err raise RuntimeError('Failed to execute script with external Python') stdout_text = (result.stdout or '') stderr_text = (result.stderr or '') # 增强错误处理:记录详细的错误信息 if result.returncode != 0: logger.error("Script execution failed (ext): return_code=%d, stdout=%s, stderr=%s", result.returncode, stdout_text, stderr_text) return None else: # 在打包环境或无外部解释器时,进程内执行脚本: buf_out = io.StringIO() buf_err = io.StringIO() fake_in = io.StringIO(exp_json) old_env = dict(os.environ) os.environ.update(script_env) old_stdin, old_stdout, old_stderr, old_argv = sys.stdin, sys.stdout, sys.stderr, sys.argv script_executed = False try: sys.stdin = fake_in sys.stdout = buf_out sys.stderr = buf_err sys.argv = [tmp_script_path] # 以 __main__ 方式执行脚本文件 runpy.run_path(tmp_script_path, run_name='__main__') script_executed = True stdout_text = buf_out.getvalue() stderr_text = buf_err.getvalue() except SystemExit as e: # 脚本可能调用了 sys.exit;非零即视为失败 script_executed = True stdout_text = buf_out.getvalue() stderr_text = (buf_err.getvalue() or '') + f"\n(SystemExit: {e.code})" if getattr(e, 'code', 0) not in (None, 0): logger.error("Script execution failed (in-proc): %s", stderr_text) return None except Exception as e: script_executed = True logger.error("Script execution error (in-proc): %s", e, exc_info=True) return None finally: sys.stdin, sys.stdout, sys.stderr, sys.argv = old_stdin, old_stdout, old_stderr, old_argv os.environ.clear(); os.environ.update(old_env) # 增强错误处理:即使在异常情况下也记录执行状态 if not script_executed: logger.error("Script failed to execute (in-proc): unknown error occurred") return None # 增强错误处理:检查执行结果 if used_external and result is not None and result.returncode != 0: logger.error("Script execution failed: return_code=%d, stdout=%s, stderr=%s", result.returncode, result.stdout, result.stderr) return None # 解析JSON输出 output = (stdout_text or '').strip() if not output: logger.warning("Script executed but returned no output; applying fallback to EXPERIMENT_JSON") output = exp_json try: data = json.loads(output) if isinstance(data, dict): if 'tables' in data: tables = data['tables'] if isinstance(tables, list) and tables: first_table = tables[0] if isinstance(first_table, dict): cells = first_table.get('cells', []) except Exception as e: # 增强错误处理:提供更详细的错误信息 logger.error("Failed to parse script output as JSON: error=%s, output=%s", e, output[:1000]) return None logger.info("Experiment script stdout: %s", output[:500]) logger.info("Script executed successfully, data length: headers=%d, rows=%d", len(data.get('headers', []) if isinstance(data, dict) else []), len(data.get('rows', []) if isinstance(data, dict) else [])) return data finally: # 清理临时文件 try: os.unlink(tmp_script_path) except Exception as e: logger.warning("Failed to remove temporary script file: %s", e) except OSError as e: # 特别处理Windows文件路径相关的错误 if e.winerror == 206: # 文件名或扩展名太长 logger.error("Failed to execute experiment script due to Windows path length limitation: %s", e) logger.error("Consider reducing the size of the script or using a shorter temp directory") else: logger.error("OS error while executing experiment script: %s", e, exc_info=True) return None except Exception as e: logger.error("Failed to execute experiment script: %s", e, exc_info=True) return None