7.4 幻觉检测与工具调用验证
最后更新于
from typing import List, Optional
from dataclasses import dataclass
@dataclass
class HallucinationDetectionResult:
is_hallucination: bool
confidence: float # 0.0-1.0
hallucination_type: str # "tool_name", "parameter", "fact"
evidence: str
correction: Optional[str] = None
class ToolNameHallucinationDetector:
"""工具名幻觉检测"""
def __init__(self, registry):
self.registry = registry
def detect(self, tool_name: str) -> HallucinationDetectionResult:
"""检测工具名是否存在"""
# 精确匹配
if self.registry.is_tool_available(tool_name):
return HallucinationDetectionResult(
is_hallucination=False,
confidence=1.0,
hallucination_type="none",
evidence=""
)
# 不存在,可能是:
# 1. 完全幻觉
# 2. 拼写错误(需纠正)
# 使用编辑距离查找最相似的工具
from difflib import SequenceMatcher
available_tools = self.registry.get_available_tools()
matches = [
(tool, SequenceMatcher(None, tool_name, tool).ratio())
for tool in available_tools
]
matches.sort(key=lambda x: x[1], reverse=True)
if matches and matches[0][1] > 0.6:
# 可能是拼写错误
corrected = matches[0][0]
return HallucinationDetectionResult(
is_hallucination=True,
confidence=0.9,
hallucination_type="tool_name",
evidence=f"工具 '{tool_name}' 不存在",
correction=f"您可能想调用 '{corrected}'"
)
else:
# 完全幻觉
return HallucinationDetectionResult(
is_hallucination=True,
confidence=0.95,
hallucination_type="tool_name",
evidence=f"工具 '{tool_name}' 完全不存在",
correction=f"可用工具: {', '.join(available_tools[:5])}"
)class ParameterHallucinationDetector:
"""参数幻觉检测"""
def __init__(self, registry):
self.registry = registry
def detect(self, tool_name: str, parameters: dict) -> List[HallucinationDetectionResult]:
"""检测参数是否存在幻觉"""
results = []
schema = self.registry.get_tool_schema(tool_name)
if schema is None:
return results
# 检查每个参数
for param_name, param_value in parameters.items():
# 未定义的参数
if param_name not in schema.get("properties", {}):
results.append(HallucinationDetectionResult(
is_hallucination=True,
confidence=0.8,
hallucination_type="parameter",
evidence=f"参数 '{param_name}' 未在工具定义中",
correction=f"允许的参数: {list(schema['properties'].keys())}"
))
continue
# 参数值范围检查
prop_def = schema["properties"][param_name]
# 数值范围
if "minimum" in prop_def or "maximum" in prop_def:
if isinstance(param_value, (int, float)):
min_val = prop_def.get("minimum")
max_val = prop_def.get("maximum")
if min_val is not None and param_value < min_val:
results.append(HallucinationDetectionResult(
is_hallucination=True,
confidence=0.9,
hallucination_type="parameter",
evidence=f"参数 '{param_name}' 值 {param_value} 小于最小值 {min_val}",
correction=f"请使用不小于 {min_val} 的值"
))
if max_val is not None and param_value > max_val:
results.append(HallucinationDetectionResult(
is_hallucination=True,
confidence=0.9,
hallucination_type="parameter",
evidence=f"参数 '{param_name}' 值 {param_value} 大于最大值 {max_val}",
correction=f"请使用不大于 {max_val} 的值"
))
# 枚举值检查
if "enum" in prop_def:
if param_value not in prop_def["enum"]:
results.append(HallucinationDetectionResult(
is_hallucination=True,
confidence=0.95,
hallucination_type="parameter",
evidence=f"参数 '{param_name}' 值 '{param_value}' 不在允许列表中",
correction=f"允许的值: {prop_def['enum']}"
))
return resultsfrom abc import ABC, abstractmethod
class FactChecker(ABC):
"""事实检查器基类"""
@abstractmethod
def check(self, statement: str) -> bool:
"""检查陈述是否为真"""
pass
class APIEndpointChecker(FactChecker):
"""检查 API 端点的真实性"""
def __init__(self, known_endpoints: dict):
self.known_endpoints = known_endpoints
def check(self, endpoint: str) -> bool:
"""检查端点是否确实存在"""
return endpoint in self.known_endpoints.values()
class PermissionChecker(FactChecker):
"""检查权限声明的真实性"""
def __init__(self, user_id: str, permission_store):
self.user_id = user_id
self.permission_store = permission_store
def check(self, statement: str) -> bool:
"""检查权限声明是否准确"""
# 例如:检查 "我有权删除数据库" 是否为真
from re import findall
perms = findall(r"delete.*database", statement, flags=2)
if not perms:
return True # 无权限声明,跳过
actual_perms = self.permission_store.get_user_permissions(self.user_id)
return "delete:database" in actual_perms
class FactHallucinationDetector:
"""事实幻觉检测"""
def __init__(self, fact_checkers: dict):
self.checkers = fact_checkers
def detect(self, tool_name: str, parameters: dict) -> List[HallucinationDetectionResult]:
"""检测参数中的事实幻觉"""
results = []
# 针对特定工具的事实检查
if tool_name == "api_call" and "endpoint" in parameters:
checker = self.checkers.get("endpoint")
if checker:
endpoint = parameters["endpoint"]
if not checker.check(endpoint):
results.append(HallucinationDetectionResult(
is_hallucination=True,
confidence=0.85,
hallucination_type="fact",
evidence=f"API 端点 '{endpoint}' 在知识库中不存在",
correction="请使用已知的 API 端点"
))
if tool_name == "delete_database":
checker = self.checkers.get("permission")
if checker:
stmt = f"删除数据库 {parameters.get('database', '')}"
if not checker.check(stmt):
results.append(HallucinationDetectionResult(
is_hallucination=True,
confidence=0.9,
hallucination_type="fact",
evidence="用户权限不足",
correction="无法执行删除操作"
))
return resultsclass HallucinationDetectionEngine:
"""完整的幻觉检测引擎"""
def __init__(self, registry, fact_checkers: dict = None):
self.tool_detector = ToolNameHallucinationDetector(registry)
self.param_detector = ParameterHallucinationDetector(registry)
self.fact_detector = FactHallucinationDetector(
fact_checkers or {}
)
def detect_all(self, tool_call) -> List[HallucinationDetectionResult]:
"""执行完整的幻觉检测"""
results = []
# 层 1: 工具名
r1 = self.tool_detector.detect(tool_call.name)
if r1.is_hallucination:
results.append(r1)
return results # 工具不存在,无需继续检查
# 层 2: 参数
r2s = self.param_detector.detect(tool_call.name, tool_call.input)
results.extend(r2s)
# 层 3: 事实
r3s = self.fact_detector.detect(tool_call.name, tool_call.input)
results.extend(r3s)
return resultsclass SelfCorrectionHandler:
"""幻觉自修正处理器"""
def __init__(self, model_provider):
self.model = model_provider
def attempt_correction(
self,
original_tool_call,
hallucination_results: List[HallucinationDetectionResult],
conversation_history: list
) -> Optional[dict]:
"""尝试让模型自我修正幻觉"""
# 构建纠正提示
correction_message = self._build_correction_prompt(
original_tool_call,
hallucination_results
)
# 添加到对话历史
conversation_history.append({
"role": "user",
"content": correction_message
})
# 请求模型重新生成
try:
response = self.model.complete(conversation_history)
return {
"corrected": True,
"original": original_tool_call,
"suggestion": response.content,
"attempts": 1
}
except Exception as e:
return {
"corrected": False,
"error": str(e)
}
def _build_correction_prompt(
self,
tool_call,
results: List[HallucinationDetectionResult]
) -> str:
"""构建纠正提示信息"""
prompt = "我发现您上一条消息中存在以下问题,请重新尝试:\n\n"
for result in results:
prompt += f"- {result.evidence}\n"
if result.correction:
prompt += f" 建议: {result.correction}\n"
prompt += "\n请重新调用正确的工具。"
return prompt# 使用完整幻觉检测
engine = HallucinationDetectionEngine(registry, fact_checkers)
tool_call = ToolUseBlock(
id="call_123",
name="send_email_to_ceo", # 幻觉的工具
input={"message": "Hello"}
)
hallucinations = engine.detect_all(tool_call)
if hallucinations:
handler = SelfCorrectionHandler(model_provider)
correction = handler.attempt_correction(
tool_call,
hallucinations,
conversation_history
)
if correction["corrected"]:
print(f"模型建议: {correction['suggestion']}")
# 等待用户确认或重新执行
else:
print(f"无法自动纠正: {correction['error']}")
# 返回错误给用户
else:
print("工具调用通过验证,可以执行")
execute_tool(tool_call)