PCM_Report/report_generator.py

676 lines
28 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from __future__ import annotations
import os, json, subprocess, sys
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
import pandas as pd
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from config_model import AppConfig, PlaceholderConfig, DbConnectionConfig
from influx_service import InfluxConnectionParams, InfluxService
from logger import get_logger
logger = get_logger()
_PROGRESS_CB: Optional[Callable[[str, int, int], None]] = None
def set_progress_callback(cb):
global _PROGRESS_CB; _PROGRESS_CB = cb
def _progress(msg, cur, total):
if _PROGRESS_CB: _PROGRESS_CB(msg, cur, total)
def _build_influx_service(cfg):
return InfluxService(InfluxConnectionParams(url=cfg.influx.url, org=cfg.influx.org, token=cfg.influx.token))
def _execute_db_query(ph, db_cfg):
query = (ph.dbQuery or "").strip()
if not query: return ""
if not db_cfg: db_cfg = DbConnectionConfig()
engine = (db_cfg.engine or "mysql").lower()
if engine in ("sqlite", "sqlite3"):
import sqlite3
conn = sqlite3.connect(db_cfg.database or str(Path(__file__).parent / "experiments.db"))
result = conn.execute(query).fetchone()
conn.close()
return str(result[0]) if result and result[0] else ""
elif engine == "mysql":
import pymysql
conn = pymysql.connect(host=getattr(db_cfg, "host", "localhost"), port=int(getattr(db_cfg, "port", 3306)),
user=getattr(db_cfg, "username", ""), password=getattr(db_cfg, "password", ""),
database=getattr(db_cfg, "database", ""), charset="utf8mb4")
with conn.cursor() as cursor:
cursor.execute(query)
result = cursor.fetchone()
conn.close()
return str(result[0]) if result and result[0] else ""
return ""
def _load_script_data_from_db(experiment_id):
try:
import sqlite3
conn = sqlite3.connect(str(Path(__file__).parent / "experiments.db"))
result = conn.execute("SELECT script_data FROM experiments WHERE id=?", (experiment_id,)).fetchone()
conn.close()
if result and result[0]:
logger.info("从数据库加载脚本数据实验ID: %d", experiment_id)
return json.loads(result[0])
except Exception as e:
logger.error("加载脚本数据失败: %s", e)
return None
def _load_experiment_info(experiment_id):
"""加载实验信息,判断是否正常(有脚本数据即为正常)"""
try:
import sqlite3
conn = sqlite3.connect(str(Path(__file__).parent / "experiments.db"))
result = conn.execute("SELECT script_data FROM experiments WHERE id=?", (experiment_id,)).fetchone()
conn.close()
if result:
# 如果有脚本数据不为None且不为空就认为是正常的
script_data = result[0]
is_normal = script_data is not None and str(script_data).strip() != ""
return {'is_normal': is_normal}
except Exception as e:
logger.error("加载实验信息失败: %s", e)
return None
def _parse_script_tables(script_data):
tables = {}
if isinstance(script_data, dict) and "tables" in script_data:
for item in script_data["tables"]:
key = item.get("token") or item.get("key")
if key: tables[str(key)] = item
return tables
def _replace_global_params(text, cfg):
"""替换文本中的 @参数名 为全局参数的值"""
if not text or '@' not in text: return text
result = text
if hasattr(cfg, 'globalParameters') and hasattr(cfg.globalParameters, 'parameters'):
import re
for param_name in re.findall(r'@(\w+)', text):
if param_name in cfg.globalParameters.parameters:
result = result.replace(f'@{param_name}', cfg.globalParameters.parameters[param_name])
return result
def _make_seconds_index(df):
if "_time" in df.columns:
t = pd.to_datetime(df["_time"])
return (t - t.iloc[0]).dt.total_seconds().round().astype(int)
return pd.Series(range(len(df)))
def _format_numeric_columns(df, exclude_cols):
if df is None or df.empty: return df
result = df.copy()
for col in result.columns:
if col not in exclude_cols:
try:
numeric = pd.to_numeric(result[col], errors="coerce")
if numeric.notna().any(): result[col] = numeric.round(2)
except: pass
return result
def _to_wide_table(df, fields, first_column, titles_map, first_title=None):
if df.empty: return pd.DataFrame()
work = df.copy()
if "_time" not in work.columns or "_value" not in work.columns: return work
if fields and "_field" in work.columns: work = work[work["_field"].isin(fields)]
if first_column == "seconds":
idx = _make_seconds_index(work)
work = work.assign(__index__=idx)
index_col, index_title = "__index__", first_title or ""
else:
index_col, index_title = "_time", first_title or "时间"
if "_field" in work.columns:
wide = work.pivot_table(index=index_col, columns="_field", values="_value", aggfunc="last")
else:
wide = work.set_index(index_col)[["_value"]]
wide.columns = ["value"]
wide = wide.sort_index()
wide.reset_index(inplace=True)
wide.rename(columns={index_col: index_title}, inplace=True)
for f, title in titles_map.items():
if f in wide.columns: wide.rename(columns={f: title}, inplace=True)
return _format_numeric_columns(wide, exclude_cols=[index_title])
# ============================================================
# 核心:跨 run 占位符替换(处理 Word 将 {token} 拆分到多个 run 的情况)
# ============================================================
def _replace_token_across_runs(paragraph, token, replacement):
"""在段落中替换占位符,处理 token 被拆分到多个 run 的情况。
例如 Word 可能将 {text4} 拆分成: Run('{')+Run('text4')+Run('}')
或将 {isNormal} 拆分成: Run('{isNormal')+Run('}')
"""
runs = paragraph.runs
if not runs:
return False
# 快速路径token 在单个 run 中
for run in runs:
if token in run.text:
run.text = run.text.replace(token, replacement)
return True
# 慢速路径token 跨越多个 run
texts = [r.text for r in runs]
full = ''.join(texts)
idx = full.find(token)
if idx < 0:
return False
token_end = idx + len(token)
# 计算每个 run 的字符区间 [start, end)
boundaries = []
pos = 0
for t in texts:
boundaries.append((pos, pos + len(t)))
pos += len(t)
# 找到 token 覆盖的第一个和最后一个 run
first_ri = last_ri = -1
for i, (s, e) in enumerate(boundaries):
if s <= idx < e and first_ri < 0:
first_ri = i
if s < token_end <= e:
last_ri = i
break
if first_ri < 0 or last_ri < 0:
return False
# 保留 token 前后的文本
before = texts[first_ri][:idx - boundaries[first_ri][0]]
after = texts[last_ri][token_end - boundaries[last_ri][0]:]
# 将替换内容写入第一个受影响的 run
runs[first_ri].text = before + replacement + after
# 清空中间和最后受影响的 run
for i in range(first_ri + 1, last_ri + 1):
runs[i].text = ''
return True
def _replace_texts_docx(doc, mapping):
"""替换文档中所有的 {key} 占位符,包括段落和表格单元格"""
for key, val in mapping.items():
token = '{' + key + '}'
replacement = val or ''
# 替换正文段落
for para in doc.paragraphs:
if token in para.text:
_replace_token_across_runs(para, token, replacement)
# 替换表格单元格(跳过合并单元格的重复项)
for table in doc.tables:
for row in table.rows:
seen_tc = set()
for cell in row.cells:
tc_id = id(cell._tc)
if tc_id in seen_tc:
continue
seen_tc.add(tc_id)
for para in cell.paragraphs:
if token in para.text:
_replace_token_across_runs(para, token, replacement)
# ============================================================
# 核心:表格数据填充(正确处理合并单元格的坐标映射)
# ============================================================
def _get_unique_cells(row):
"""获取行中的唯一单元格列表(合并单元格只返回一次)"""
seen = set()
cells = []
for cell in row.cells:
tc_id = id(cell._tc)
if tc_id not in seen:
seen.add(tc_id)
cells.append(cell)
return cells
def _fill_script_table_docx(doc, token, table_spec):
"""填充脚本表格数据到 Word 文档中。
坐标系统说明:
- 脚本数据中的 row 是表格的绝对行号
- 脚本数据中的 col 是相对于 token 所在"唯一单元格"位置的偏移
- 合并单元格被视为一个单元格,因此 col=1 跳过合并区域到达下一个唯一单元格
"""
cells_data = table_spec.get("cells") or []
if not cells_data:
return
token_with_braces = '{' + token + '}'
table_found = None
token_row = 0
token_unique_col = 0
# 在表格中查找 token
for table in doc.tables:
for ri, row in enumerate(table.rows):
unique = _get_unique_cells(row)
for uci, cell in enumerate(unique):
if token_with_braces in cell.text:
table_found = table
token_row = ri
token_unique_col = uci
break
if table_found:
break
if table_found:
break
if not table_found:
logger.warning("未找到 token: %s", token_with_braces)
return
logger.info("找到 token %s 在表格 row=%d, unique_col=%d", token_with_braces, token_row, token_unique_col)
# 清除 token 文本(保留同一单元格中的其他文字如"环境温度"
target_cell = _get_unique_cells(table_found.rows[token_row])[token_unique_col]
for para in target_cell.paragraphs:
_replace_token_across_runs(para, token_with_braces, '')
# 填充数据
for cell_info in cells_data:
if not isinstance(cell_info, dict):
continue
value = cell_info.get("value")
if value is None:
continue
data_row = int(cell_info.get("row", 0))
data_col = int(cell_info.get("col", 0))
try:
if data_row >= len(table_found.rows):
logger.warning("%d 超出表格范围 (%d行)", data_row, len(table_found.rows))
continue
unique = _get_unique_cells(table_found.rows[data_row])
target_idx = token_unique_col + data_col
if target_idx >= len(unique):
logger.warning("%d (target_idx=%d) 超出范围 (%d列)", data_col, target_idx, len(unique))
continue
cell = unique[target_idx]
para = cell.paragraphs[0] if cell.paragraphs else None
if para is None:
cell.text = str(value)
elif para.runs:
# 有现有 run修改第一个 run 的文本
para.runs[0].text = str(value)
# 清空其余 run
for r in para.runs[1:]:
r.text = ''
else:
# 没有 run添加一个新 run
para.add_run(str(value))
# 设置居中对齐
if para is not None:
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
except Exception as e:
logger.warning("填充失败 row=%d col=%d: %s", data_row, data_col, e)
# ============================================================
# 报告生成入口
# ============================================================
def render_report(template_path, cfg, output_path, experiment_id=None):
logger.info("=== 开始生成报告 ===")
_progress("加载数据", 0, 5)
# 加载脚本数据和实验信息
script_data = _load_script_data_from_db(experiment_id) if experiment_id else None
script_tables = _parse_script_tables(script_data)
logger.info("脚本表格: %s", list(script_tables.keys()))
# 打开模板
doc = Document(str(template_path))
_progress("替换文本", 1, 5)
# 构建文本映射
text_map = {}
if hasattr(cfg, 'placeholders'):
placeholders = cfg.placeholders if isinstance(cfg.placeholders, dict) else {}
for key, ph in placeholders.items():
if hasattr(ph, 'type'):
if ph.type == "text" and hasattr(ph, 'value'):
text_map[key] = _replace_global_params(ph.value or '', cfg)
elif ph.type == "dbText" and hasattr(ph, 'dbQuery'):
text_map[key] = _execute_db_query(ph, getattr(cfg, 'db', None))
# 添加实验信息占位符isNormal 打勾)
# 无论如何都要添加,避免占位符未被替换
is_normal_checked = ''
if experiment_id:
exp_info = _load_experiment_info(experiment_id)
if exp_info and exp_info.get('is_normal'):
is_normal_checked = '\u2611'
text_map['isNormal'] = is_normal_checked
logger.info("文本映射: %d 个, keys=%s", len(text_map), list(text_map.keys()))
_replace_texts_docx(doc, text_map)
# 填充脚本表格数据
_progress("填充表格", 2, 5)
for token, spec in script_tables.items():
_fill_script_table_docx(doc, token, spec)
# 保存
_progress("保存", 4, 5)
doc.save(str(output_path))
_progress("完成", 5, 5)
logger.info("=== 报告生成完成: %s ===", output_path)
return output_path
def _execute_experiment_script(cfg: AppConfig) -> Optional[Dict]:
"""
执行实验流程中的Python脚本
Args:
cfg: 应用配置
Returns:
脚本返回的JSON数据如果没有脚本或执行失败返回None
"""
logger.info("_execute_experiment_script invoked")
if not cfg.experimentProcess.scriptFile:
logger.info("No experiment script configured")
return None
try:
import base64
import json
import tempfile
import subprocess
import sys
from shutil import which
import io
import runpy
import os
# 解码base64脚本
logger.info("Decoded script length: %d", len(cfg.experimentProcess.scriptFile))
script_content = base64.b64decode(cfg.experimentProcess.scriptFile)
logger.info("Script bytes size: %d", len(script_content))
# 创建临时文件执行脚本,使用较短的临时目录路径
temp_dir = tempfile.gettempdir()
logger.debug("System temp directory: %s", temp_dir)
# 创建临时脚本文件
with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', dir=temp_dir, delete=False) as tmp_file:
tmp_file.write(script_content)
tmp_script_path = tmp_file.name
# 记录临时文件路径长度,便于调试
logger.debug("Temp script path length: %d, path: %s", len(tmp_script_path), tmp_script_path)
if len(tmp_script_path) > 250:
logger.warning("Temp script path is quite long (%d chars), might cause issues on Windows", len(tmp_script_path))
try:
# 构造传入脚本的 experimentProcess JSON原样传递该对象本身
cfg_dict = cfg.to_dict()
exp_obj = cfg_dict.get("experimentProcess", {})
exp_json = json.dumps(exp_obj, ensure_ascii=False)
logger.info("Experiment script payload (first 300 chars): %s", exp_json[:300])
# 检查JSON数据大小过大可能引起问题
exp_json_size = len(exp_json)
if exp_json_size > 10000:
logger.warning("Experiment JSON is quite large (%d chars), might cause issues on Windows", exp_json_size)
# 提取时间范围
experiment_start = os.environ.get('EXPERIMENT_START', '').strip()
experiment_end = os.environ.get('EXPERIMENT_END', '').strip()
# 最高优先级:调用方已通过环境变量传入(例如实验历史列表)
if not experiment_start or not experiment_end:
# 其次尝试占位符配置中的 timeRange
for ph in cfg.placeholders.values():
if ph.influx and ph.influx.timeRange:
time_range = ph.influx.timeRange.strip()
if "start:" in time_range and "stop:" in time_range:
try:
parts = time_range.split(",")
local_start = experiment_start
local_end = experiment_end
for part in parts:
part = part.strip()
if part.startswith("start:"):
local_start = part.replace("start:", "").strip()
elif part.startswith("stop:"):
local_end = part.replace("stop:", "").strip()
if local_start and local_end:
experiment_start = experiment_start or local_start
experiment_end = experiment_end or local_end
break
except Exception as e:
logger.warning("Failed to parse timeRange: %s", e)
# 最后回退:如果仍缺失,尝试使用实验流程 remark 中的时间范围(格式 start=...,end=...
if (not experiment_start or not experiment_end) and cfg.experimentProcess.remark:
remark = cfg.experimentProcess.remark
try:
if "start=" in remark and "end=" in remark:
parts = remark.split(",")
local_start = experiment_start
local_end = experiment_end
for part in parts:
part = part.strip()
if part.startswith("start="):
local_start = part.replace("start=", "").strip()
elif part.startswith("end="):
local_end = part.replace("end=", "").strip()
if local_start and local_end:
experiment_start = experiment_start or local_start
experiment_end = experiment_end or local_end
except Exception as e:
logger.warning("Failed to parse remark for time range: %s", e)
# 准备环境变量,避免传递过大数据
script_env = os.environ.copy()
script_env.update({
'PYTHONIOENCODING': 'utf-8',
})
# 仅在数据不太大时才通过环境变量传递
if exp_json_size < 8192:
script_env['EXPERIMENT_JSON'] = exp_json
else:
logger.info("EXPERIMENT_JSON is too large for environment variable, will pass via stdin only")
# 添加实验时间范围
if experiment_start:
script_env['EXPERIMENT_START'] = experiment_start
if experiment_end:
script_env['EXPERIMENT_END'] = experiment_end
# 添加 InfluxDB 配置
if cfg.influx.url:
script_env['INFLUX_URL'] = cfg.influx.url
if cfg.influx.org:
script_env['INFLUX_ORG'] = cfg.influx.org
if cfg.influx.token:
script_env['INFLUX_TOKEN'] = cfg.influx.token
# 从配置中提取 bucket 和 measurement从第一个 table/chart 占位符)
for ph in cfg.placeholders.values():
if ph.influx:
if ph.influx.bucket:
script_env['INFLUX_BUCKET'] = ph.influx.bucket
if ph.influx.measurement:
script_env['INFLUX_MEASUREMENT'] = ph.influx.measurement
if ph.influx.bucket or ph.influx.measurement:
break # 使用第一个找到的配置
# 选择执行方式:检查是否在打包环境中
# 打包环境通过检查 sys.frozen 属性判断
is_frozen = getattr(sys, 'frozen', False)
candidates: List[List[str]] = []
# 仅在非打包环境(开发态)下尝试外部解释器
if not is_frozen:
if which('python'):
candidates.append(['python', tmp_script_path])
if sys.platform.startswith('win') and which('py'):
candidates.append(['py', '-3', tmp_script_path])
logger.info("Is frozen (packaged): %s", is_frozen)
logger.info("Experiment script candidates: %s", candidates)
stdout_text: str = ""
stderr_text: str = ""
logger.info("Executing experiment script: %s", cfg.experimentProcess.scriptName)
if experiment_start and experiment_end:
logger.info("Experiment time range: %s to %s", experiment_start, experiment_end)
used_external = False
if candidates:
last_err = None
result = None
for cmd in candidates:
try:
# 修复:移除传递给脚本的参数,避免参数解析错误
result = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding='utf-8',
errors='replace',
timeout=30,
env=script_env,
input=exp_json, # 通过stdin传递数据避免环境变量限制
)
break
except Exception as e:
last_err = e
logger.warning("Failed to execute script with command %s: %s", cmd, e)
continue
used_external = result is not None
if result is None:
if last_err:
raise last_err
raise RuntimeError('Failed to execute script with external Python')
stdout_text = (result.stdout or '')
stderr_text = (result.stderr or '')
# 增强错误处理:记录详细的错误信息
if result.returncode != 0:
logger.error("Script execution failed (ext): return_code=%d, stdout=%s, stderr=%s",
result.returncode, stdout_text, stderr_text)
return None
else:
# 在打包环境或无外部解释器时,进程内执行脚本:
buf_out = io.StringIO()
buf_err = io.StringIO()
fake_in = io.StringIO(exp_json)
old_env = dict(os.environ)
os.environ.update(script_env)
old_stdin, old_stdout, old_stderr, old_argv = sys.stdin, sys.stdout, sys.stderr, sys.argv
script_executed = False
try:
sys.stdin = fake_in
sys.stdout = buf_out
sys.stderr = buf_err
sys.argv = [tmp_script_path]
# 以 __main__ 方式执行脚本文件
runpy.run_path(tmp_script_path, run_name='__main__')
script_executed = True
stdout_text = buf_out.getvalue()
stderr_text = buf_err.getvalue()
except SystemExit as e:
# 脚本可能调用了 sys.exit非零即视为失败
script_executed = True
stdout_text = buf_out.getvalue()
stderr_text = (buf_err.getvalue() or '') + f"\n(SystemExit: {e.code})"
if getattr(e, 'code', 0) not in (None, 0):
logger.error("Script execution failed (in-proc): %s", stderr_text)
return None
except Exception as e:
script_executed = True
logger.error("Script execution error (in-proc): %s", e, exc_info=True)
return None
finally:
sys.stdin, sys.stdout, sys.stderr, sys.argv = old_stdin, old_stdout, old_stderr, old_argv
os.environ.clear(); os.environ.update(old_env)
# 增强错误处理:即使在异常情况下也记录执行状态
if not script_executed:
logger.error("Script failed to execute (in-proc): unknown error occurred")
return None
# 增强错误处理:检查执行结果
if used_external and result is not None and result.returncode != 0:
logger.error("Script execution failed: return_code=%d, stdout=%s, stderr=%s",
result.returncode, result.stdout, result.stderr)
return None
# 解析JSON输出
output = (stdout_text or '').strip()
if not output:
logger.warning("Script executed but returned no output; applying fallback to EXPERIMENT_JSON")
output = exp_json
try:
data = json.loads(output)
if isinstance(data, dict):
if 'tables' in data:
tables = data['tables']
if isinstance(tables, list) and tables:
first_table = tables[0]
if isinstance(first_table, dict):
cells = first_table.get('cells', [])
except Exception as e:
# 增强错误处理:提供更详细的错误信息
logger.error("Failed to parse script output as JSON: error=%s, output=%s", e, output[:1000])
return None
logger.info("Experiment script stdout: %s", output[:500])
logger.info("Script executed successfully, data length: headers=%d, rows=%d",
len(data.get('headers', []) if isinstance(data, dict) else []),
len(data.get('rows', []) if isinstance(data, dict) else []))
return data
finally:
# 清理临时文件
try:
os.unlink(tmp_script_path)
except Exception as e:
logger.warning("Failed to remove temporary script file: %s", e)
except OSError as e:
# 特别处理Windows文件路径相关的错误
if e.winerror == 206: # 文件名或扩展名太长
logger.error("Failed to execute experiment script due to Windows path length limitation: %s", e)
logger.error("Consider reducing the size of the script or using a shorter temp directory")
else:
logger.error("OS error while executing experiment script: %s", e, exc_info=True)
return None
except Exception as e:
logger.error("Failed to execute experiment script: %s", e, exc_info=True)
return None