12.2.1 成本建模与 ROI 分析框架
12.2.1.1 引言:为什么需要成本建模
12.2.1.2 基础 Token价格表
from dataclasses import dataclass
from typing import Dict, List
from enum import Enum
class ModelProvider(Enum):
OPENAI = "openai"
ANTHROPIC = "anthropic"
GOOGLE = "google"
OPEN_SOURCE = "open_source"
@dataclass
class ModelPricing:
"""模型定价信息"""
provider: ModelProvider
model_name: str
input_price_per_1k_tokens: float # 美元
output_price_per_1k_tokens: float # 美元
cache_write_price_per_1k_tokens: float # 缓存写入价格(如果支持)
cache_read_price_per_1k_tokens: float # 缓存读取价格(如果支持)
def __post_init__(self):
"""验证价格数据"""
assert self.input_price_per_1k_tokens >= 0
assert self.output_price_per_1k_tokens >= 0
# 通常缓存读取价格更便宜
if self.cache_read_price_per_1k_tokens > 0:
assert self.cache_read_price_per_1k_tokens < self.input_price_per_1k_tokens
# 2026 年 3 月的参考价格(历史/近似数据 - 定期核对最新定价)
PRICE_TABLE = [
ModelPricing(
provider=ModelProvider.OPENAI,
model_name="gpt-4o",
input_price_per_1k_tokens=0.005,
output_price_per_1k_tokens=0.015,
cache_write_price_per_1k_tokens=0.005 * 1.25, # 写入成本更高
cache_read_price_per_1k_tokens=0.005 * 0.1, # 读取便宜 90%
),
ModelPricing(
provider=ModelProvider.OPENAI,
model_name="gpt-4o-mini",
input_price_per_1k_tokens=0.00015,
output_price_per_1k_tokens=0.0006,
cache_write_price_per_1k_tokens=0.00015 * 1.25,
cache_read_price_per_1k_tokens=0.00015 * 0.1,
),
ModelPricing(
provider=ModelProvider.ANTHROPIC,
model_name="claude-opus-4.6",
input_price_per_1k_tokens=0.005,
output_price_per_1k_tokens=0.025,
cache_write_price_per_1k_tokens=0.005 * 1.25,
cache_read_price_per_1k_tokens=0.005 * 0.1,
),
ModelPricing(
provider=ModelProvider.ANTHROPIC,
model_name="claude-sonnet-4.6",
input_price_per_1k_tokens=0.003,
output_price_per_1k_tokens=0.015,
cache_write_price_per_1k_tokens=0.003 * 1.25,
cache_read_price_per_1k_tokens=0.003 * 0.1,
),
]
# 建立模型名称到价格的映射
PRICING_MAP = {
f"{p.provider.value}:{p.model_name}": p
for p in PRICE_TABLE
}
class TokenCostCalculator:
"""Token成本计算器"""
def __init__(self, pricing_map: Dict[str, ModelPricing] = PRICING_MAP):
self.pricing_map = pricing_map
def calculate_single_request_cost(
self,
model_key: str, # 格式: "openai:gpt-4-turbo"
input_tokens: int,
output_tokens: int,
use_cache: bool = False,
cache_hit_ratio: float = 0.0 # 缓存命中率(0-1)
) -> Dict[str, float]:
"""
计算单次请求的 Token成本。
返回字典包含:
- input_cost: 输入成本
- output_cost: 输出成本
- cache_cost: 缓存相关成本(如适用)
- total_cost: 总成本
"""
if model_key not in self.pricing_map:
raise ValueError(f"Unknown model: {model_key}")
pricing = self.pricing_map[model_key]
if not use_cache:
# 无缓存场景:按正常价格计算
input_cost = (input_tokens / 1000) * pricing.input_price_per_1k_tokens
output_cost = (output_tokens / 1000) * pricing.output_price_per_1k_tokens
return {
'input_cost': input_cost,
'output_cost': output_cost,
'cache_cost': 0.0,
'total_cost': input_cost + output_cost,
}
else:
# 缓存场景:部分命中缓存
cache_hit_tokens = int(input_tokens * cache_hit_ratio)
cache_miss_tokens = input_tokens - cache_hit_tokens
# 缓存命中:使用便宜的缓存读取价格
cache_hit_cost = (cache_hit_tokens / 1000) * pricing.cache_read_price_per_1k_tokens
# 缓存未命中:使用正常价格 + 写入缓存的额外费用
cache_miss_cost = (cache_miss_tokens / 1000) * pricing.input_price_per_1k_tokens
cache_write_cost = (cache_miss_tokens / 1000) * pricing.cache_write_price_per_1k_tokens
# 输出成本不变
output_cost = (output_tokens / 1000) * pricing.output_price_per_1k_tokens
total_input_cost = cache_hit_cost + cache_miss_cost + cache_write_cost
return {
'input_cost': total_input_cost,
'cache_hit_cost': cache_hit_cost,
'cache_miss_cost': cache_miss_cost,
'cache_write_cost': cache_write_cost,
'output_cost': output_cost,
'total_cost': total_input_cost + output_cost,
}
def calculate_batch_cost(
self,
model_key: str,
requests: List[Dict], # 每个请求包含 input_tokens, output_tokens
use_cache: bool = False,
cache_hit_ratio: float = 0.0
) -> Dict:
"""计算批量请求的总成本"""
total_cost = {
'input_cost': 0.0,
'output_cost': 0.0,
'cache_cost': 0.0,
'total_cost': 0.0,
'num_requests': len(requests),
}
for req in requests:
single_cost = self.calculate_single_request_cost(
model_key,
req['input_tokens'],
req['output_tokens'],
use_cache,
cache_hit_ratio
)
total_cost['input_cost'] += single_cost.get('input_cost', 0)
total_cost['output_cost'] += single_cost.get('output_cost', 0)
total_cost['cache_cost'] += single_cost.get('cache_cost', 0)
total_cost['total_cost'] += single_cost['total_cost']
# 计算平均成本
total_cost['avg_cost_per_request'] = total_cost['total_cost'] / len(requests) if requests else 0
return total_cost
def project_monthly_cost(
self,
model_key: str,
daily_requests: int,
avg_input_tokens_per_request: int,
avg_output_tokens_per_request: int,
use_cache: bool = False,
cache_hit_ratio: float = 0.0
) -> Dict:
"""
根据日请求量和 Token数量,预测月度成本。
假设按工作日计算(20天/月)。
"""
monthly_requests = daily_requests * 20
single_request_cost = self.calculate_single_request_cost(
model_key,
avg_input_tokens_per_request,
avg_output_tokens_per_request,
use_cache,
cache_hit_ratio
)
monthly_cost = single_request_cost['total_cost'] * monthly_requests
return {
'daily_requests': daily_requests,
'monthly_requests': monthly_requests,
'avg_input_tokens': avg_input_tokens_per_request,
'avg_output_tokens': avg_output_tokens_per_request,
'cost_per_request': single_request_cost['total_cost'],
'monthly_cost': monthly_cost,
'monthly_cost_usd': f"${monthly_cost:.2f}",
}12.2.1.3 长上下文成本分析
12.2.1.4 缓存策略的 ROI分析
12.2.1.5 综合 ROI评估框架
12.2.1.6 实际案例:电商客服系统的成本分析
最后更新于
