class BudgetBasedThinkingProvider:
"""基于预算的思考提供者"""
def __init__(self, config: ReasoningBudget):
self.config = config
self.client = anthropic.Anthropic()
self.session_thinking_tokens = 0
self.session_cost = 0.0
def complete(self, messages, task_type: str = "generic"):
"""根据预算和任务复杂度决定是否思考"""
# 评估任务复杂度
complexity = self._assess_complexity(messages, task_type)
# 检查成本预算
estimated_thinking_cost = self._estimate_cost(complexity)
can_afford = (
self.session_cost + estimated_thinking_cost <
self.config.budget_threshold
)
# 检查 token 预算
can_afford_tokens = (
self.session_thinking_tokens +
(self.config.max_thinking_tokens or 10000) <
(self.config.max_thinking_per_session or 100000)
)
should_think = (
complexity in ["hard", "very_hard"] and
can_afford and
can_afford_tokens
)
if should_think:
return self._complete_with_thinking(messages)
else:
return self._complete_without_thinking(messages)
def _assess_complexity(self, messages: list, task_type: str) -> str:
"""评估任务复杂度"""
# 基于消息长度、任务类型等因素
total_tokens = sum(
len(msg.get("content", "").split()) * 1.3
for msg in messages
)
type_complexity = {
"reasoning": "hard",
"coding": "hard",
"analysis": "medium",
"summarization": "easy",
"generic": "medium",
}
base_complexity = type_complexity.get(task_type, "medium")
if total_tokens > 5000:
return "very_hard" if base_complexity == "hard" else base_complexity
elif total_tokens > 2000:
return base_complexity
else:
return "easy"
def _estimate_cost(self, complexity: str) -> float:
"""估算思考成本"""
complexity_to_tokens = {
"easy": 2000,
"medium": 5000,
"hard": 10000,
"very_hard": 15000,
}
tokens = complexity_to_tokens.get(complexity, 5000)
return tokens * 0.015 / 1000 # 思考 token 成本(与输出同价)
def _complete_with_thinking(self, messages):
"""带思考的完成"""
# 注:在 Claude 4.7+ 使用 type:"adaptive",旧模型可用 type:"enabled" with budget_tokens
response = self.client.messages.create(
model="claude-opus-4-6", # Opus 4.6+ 推荐使用 adaptive 模式
max_tokens=16000,
thinking={
"type": "adaptive", # 推荐改用 adaptive 而非已弃用的 enabled
},
messages=messages,
)
# 解析思考块和输出
thinking_content = ""
output_content = ""
for block in response.content:
if block.type == "thinking":
thinking_content = block.thinking
elif block.type == "text":
output_content = block.text
thinking_tokens = getattr(response.usage, "thinking_tokens", 0)
output_tokens = response.usage.output_tokens
return ThinkingResult(
thinking_tokens=thinking_tokens,
thinking_content=thinking_content,
output_tokens=output_tokens,
output_content=output_content,
total_cost=0.0,
thinking_ratio=thinking_tokens / (thinking_tokens + output_tokens)
if (thinking_tokens + output_tokens) > 0 else 0.0,
)
def _complete_without_thinking(self, messages):
"""不使用思考的完成"""
response = self.client.messages.create(
model="claude-sonnet-4-6",
max_tokens=4096,
messages=messages,
)
output_content = ""
for block in response.content:
if block.type == "text":
output_content = block.text
return ThinkingResult(
thinking_tokens=0,
thinking_content="",
output_tokens=response.usage.output_tokens,
output_content=output_content,
total_cost=0.0,
thinking_ratio=0.0,
)