# 12.2.1 成本建模与 ROI分析框架

## 12.2.1.1 引言：为什么需要成本建模

在上下文工程的实践中，一个常见的陷阱是“无限制优化”——不断增加上下文长度、提升检索精度、缓存更多数据，却没有对应的成本收益分析。结果往往是：

* 基础设施成本飙升，但用户体验改善幅度有限
* 工程团队在边际收益递减的优化上耗费大量时间
* 无法与业务决策者有效沟通“这个优化值不值得做”

**上下文工程的成本维度多样化**：

1. **Token成本**：输入/输出 Token的直接成本（最易量化）
2. **计算成本**：向量检索、重排序、压缩等的计算开销
3. **存储成本**：知识库存储、向量库、缓存的存储开销
4. **延迟成本**：响应时间影响的用户体验和转化率
5. **维护成本**：知识库更新、模型迭代、监控告警的人力成本
6. **合规成本**：数据安全、审计追踪、合规检查的额外投入

本节提供从基础 Token计算到复杂 ROI评估的完整框架。

## 12.2.1.2 基础 Token价格表

```python
from dataclasses import dataclass
from typing import Dict, List
from enum import Enum
import json

class ModelProvider(Enum):
    OPENAI = "openai"
    ANTHROPIC = "anthropic"
    GOOGLE = "google"
    OPEN_SOURCE = "open_source"

@dataclass
class ModelPricing:
    """模型定价信息"""
    provider: ModelProvider
    model_name: str
    input_price_per_1k_tokens: float  # 美元
    output_price_per_1k_tokens: float  # 美元
    cache_write_price_per_1k_tokens: float  # 缓存写入价格（如果支持）
    cache_read_price_per_1k_tokens: float  # 缓存读取价格（如果支持）

    def __post_init__(self):
        """验证价格数据"""
        assert self.input_price_per_1k_tokens >= 0
        assert self.output_price_per_1k_tokens >= 0
        # 通常缓存读取价格更便宜
        if self.cache_read_price_per_1k_tokens > 0:
            assert self.cache_read_price_per_1k_tokens < self.input_price_per_1k_tokens

# 教学示例价格，不代表任何供应商的实时定价。
# 生产环境必须使用具体模型 ID，并从供应商当前 Pricing 页或合同价同步。
PRICE_TABLE = [
    ModelPricing(
        provider=ModelProvider.OPENAI,
        model_name="example-frontier",
        input_price_per_1k_tokens=0.0025,
        output_price_per_1k_tokens=0.015,
        cache_write_price_per_1k_tokens=0.0025 * 1.25,  # 写入成本更高
        cache_read_price_per_1k_tokens=0.0025 * 0.1,    # 读取便宜 90%
    ),
    ModelPricing(
        provider=ModelProvider.OPENAI,
        model_name="example-mini",
        input_price_per_1k_tokens=0.00075,
        output_price_per_1k_tokens=0.0045,
        cache_write_price_per_1k_tokens=0.00075 * 1.25,
        cache_read_price_per_1k_tokens=0.00075 * 0.1,
    ),
    ModelPricing(
        provider=ModelProvider.ANTHROPIC,
        model_name="example-frontier",
        input_price_per_1k_tokens=0.005,
        output_price_per_1k_tokens=0.025,
        cache_write_price_per_1k_tokens=0.005 * 1.25,
        cache_read_price_per_1k_tokens=0.005 * 0.1,
    ),
    ModelPricing(
        provider=ModelProvider.ANTHROPIC,
        model_name="example-balanced",
        input_price_per_1k_tokens=0.003,
        output_price_per_1k_tokens=0.015,
        cache_write_price_per_1k_tokens=0.003 * 1.25,
        cache_read_price_per_1k_tokens=0.003 * 0.1,
    ),
]

# 建立模型名称到价格的映射
# 价格表仅用于演示计算结构；生产环境请把它替换为经版本标注的当前价格快照。
PRICING_MAP = {
    f"{p.provider.value}:{p.model_name}": p
    for p in PRICE_TABLE
}


class TokenCostCalculator:
    """Token成本计算器"""

    def __init__(self, pricing_map: Dict[str, ModelPricing] = PRICING_MAP):
        self.pricing_map = pricing_map

    def calculate_single_request_cost(
        self,
        model_key: str,  # 格式: "openai:example-frontier"
        input_tokens: int,
        output_tokens: int,
        use_cache: bool = False,
        cache_hit_ratio: float = 0.0  # 缓存命中率（0-1）
    ) -> Dict[str, float]:
        """
        计算单次请求的 Token成本。

        返回字典包含：
        - input_cost: 输入成本
        - output_cost: 输出成本
        - cache_cost: 缓存相关成本（如适用）
        - total_cost: 总成本
        """

        if model_key not in self.pricing_map:
            raise ValueError(f"Unknown model: {model_key}")

        pricing = self.pricing_map[model_key]

        if not use_cache:
            # 无缓存场景：按正常价格计算
            input_cost = (input_tokens / 1000) * pricing.input_price_per_1k_tokens
            output_cost = (output_tokens / 1000) * pricing.output_price_per_1k_tokens

            return {
                'input_cost': input_cost,
                'output_cost': output_cost,
                'cache_cost': 0.0,
                'total_cost': input_cost + output_cost,
            }
        else:
            # 缓存场景：部分命中缓存
            cache_hit_tokens = int(input_tokens * cache_hit_ratio)
            cache_miss_tokens = input_tokens - cache_hit_tokens

            # 缓存命中：使用便宜的缓存读取价格
            cache_hit_cost = (cache_hit_tokens / 1000) * pricing.cache_read_price_per_1k_tokens

            # 缓存未命中：写入缓存的 token 按 cache write 单价计费，
            # 不再额外叠加普通 input 单价
            cache_write_cost = (cache_miss_tokens / 1000) * pricing.cache_write_price_per_1k_tokens

            # 输出成本不变
            output_cost = (output_tokens / 1000) * pricing.output_price_per_1k_tokens

            total_input_cost = cache_hit_cost + cache_write_cost

            return {
                'input_cost': total_input_cost,
                'cache_hit_cost': cache_hit_cost,
                'cache_miss_cost': cache_write_cost,
                'cache_write_cost': cache_write_cost,
                'cache_cost': total_input_cost,
                'output_cost': output_cost,
                'total_cost': total_input_cost + output_cost,
            }

    def calculate_batch_cost(
        self,
        model_key: str,
        requests: List[Dict],  # 每个请求包含 input_tokens, output_tokens
        use_cache: bool = False,
        cache_hit_ratio: float = 0.0
    ) -> Dict:
        """计算批量请求的总成本"""

        total_cost = {
            'input_cost': 0.0,
            'output_cost': 0.0,
            'cache_cost': 0.0,
            'total_cost': 0.0,
            'num_requests': len(requests),
        }

        for req in requests:
            single_cost = self.calculate_single_request_cost(
                model_key,
                req['input_tokens'],
                req['output_tokens'],
                use_cache,
                cache_hit_ratio
            )

            total_cost['input_cost'] += single_cost.get('input_cost', 0)
            total_cost['output_cost'] += single_cost.get('output_cost', 0)
            total_cost['cache_cost'] += single_cost.get('cache_cost', 0)
            total_cost['total_cost'] += single_cost['total_cost']

        # 计算平均成本
        total_cost['avg_cost_per_request'] = total_cost['total_cost'] / len(requests) if requests else 0

        return total_cost

    def project_monthly_cost(
        self,
        model_key: str,
        daily_requests: int,
        avg_input_tokens_per_request: int,
        avg_output_tokens_per_request: int,
        use_cache: bool = False,
        cache_hit_ratio: float = 0.0
    ) -> Dict:
        """
        根据日请求量和 Token数量，预测月度成本。

        假设按工作日计算（20天/月）。
        """

        monthly_requests = daily_requests * 20

        single_request_cost = self.calculate_single_request_cost(
            model_key,
            avg_input_tokens_per_request,
            avg_output_tokens_per_request,
            use_cache,
            cache_hit_ratio
        )

        monthly_cost = single_request_cost['total_cost'] * monthly_requests

        return {
            'daily_requests': daily_requests,
            'monthly_requests': monthly_requests,
            'avg_input_tokens': avg_input_tokens_per_request,
            'avg_output_tokens': avg_output_tokens_per_request,
            'cost_per_request': single_request_cost['total_cost'],
            'monthly_cost': monthly_cost,
            'monthly_cost_usd': f"${monthly_cost:.2f}",
        }
```

## 12.2.1.3 长上下文成本分析

长上下文（extended context）是一把双刃剑：

* **收益**：更多信息在单次请求中可用，减少多轮交互
* **成本**：输入 Token数量增加，成本线性上升

```python
class ContextLengthCostAnalysis:
    """分析不同上下文长度的成本-效益权衡"""

    def __init__(self, calculator: TokenCostCalculator):
        self.calculator = calculator

    def compare_context_strategies(
        self,
        model_key: str,
        user_query_tokens: int = 100,
        output_tokens: int = 500,
        knowledge_base_sizes: List[int] = None  # 上下文块的 token数
    ) -> Dict:
        """
        比较不同上下文大小下的成本与预期收益。

        knowledge_base_sizes: 要对比的上下文大小列表
        例如：[500, 1000, 2000, 4000] 表示包含不同大小的知识块
        """

        if knowledge_base_sizes is None:
            knowledge_base_sizes = [500, 1000, 2000, 4000, 8000]

        analysis = {
            'strategies': [],
            'breakeven_point': None,
        }

        for ctx_size in knowledge_base_sizes:
            total_input_tokens = user_query_tokens + ctx_size

            # 假设更大的上下文能减少后续交互
            # 这里用经验值：每增加 1000 tokens上下文，能减少 10%的后续轮次
            follow_up_reduction = max(0, (ctx_size - 500) / 1000 * 0.1)
            expected_follow_ups = 1 - follow_up_reduction  # 原期望平均 1次 follow-up

            # 成本计算
            single_cost = self.calculator.calculate_single_request_cost(
                model_key,
                total_input_tokens,
                output_tokens
            )

            # 假设需要 2轮对话来完成任务（初始查询 + follow-up）
            total_interaction_cost = single_cost['total_cost'] + (single_cost['total_cost'] * expected_follow_ups)

            strategy = {
                'context_tokens': ctx_size,
                'total_input_tokens': total_input_tokens,
                'output_tokens': output_tokens,
                'single_request_cost': single_cost['total_cost'],
                'expected_follow_ups': expected_follow_ups,
                'expected_total_interaction_cost': total_interaction_cost,
            }

            analysis['strategies'].append(strategy)

        # 找到最优点（成本最低的完成任务方式）
        optimal = min(
            analysis['strategies'],
            key=lambda s: s['expected_total_interaction_cost']
        )
        analysis['optimal_context_size'] = optimal['context_tokens']
        analysis['optimal_cost'] = optimal['expected_total_interaction_cost']

        return analysis

    def cost_quality_tradeoff(
        self,
        model_key: str,
        baseline_context_size: int = 2000,
        quality_scores: Dict[int, float] = None  # 不同上下文大小下的质量评分
    ) -> Dict:
        """
        分析成本与质量的权衡曲线。

        quality_scores: 键为上下文 tokens数，值为预期的回答质量评分（0-100）
        """

        if quality_scores is None:
            # 使用默认的质量函数：收益递减
            quality_scores = {
                500: 60,
                1000: 75,
                2000: 85,
                4000: 90,
                8000: 93,
            }

        analysis = {
            'datapoints': [],
        }

        for ctx_size, quality_score in quality_scores.items():
            cost_data = self.calculator.calculate_single_request_cost(
                model_key,
                ctx_size + 100,  # 加上查询大小
                500  # 假设输出 token固定
            )

            # 计算成本-效益指标：质量分数与成本的比值
            quality_per_dollar = quality_score / max(cost_data['total_cost'], 0.0001)

            datapoint = {
                'context_tokens': ctx_size,
                'quality_score': quality_score,
                'cost': cost_data['total_cost'],
                'quality_per_dollar': quality_per_dollar,
                'efficiency_ratio': quality_per_dollar / 100,  # 正常化到 0-1范围
            }

            analysis['datapoints'].append(datapoint)

        # 找到效率最高的点
        optimal_point = max(analysis['datapoints'], key=lambda p: p['quality_per_dollar'])
        analysis['optimal_point'] = optimal_point

        return analysis
```

## 12.2.1.4 缓存策略的 ROI分析

缓存是上下文工程中最重要的成本优化手段。

```python
class CacheROIAnalyzer:
    """缓存投资回报率分析"""

    def __init__(self, calculator: TokenCostCalculator):
        self.calculator = calculator

    def analyze_cache_investment(
        self,
        model_key: str,
        without_cache: Dict,  # 不使用缓存的场景
        with_cache: Dict,      # 使用缓存的场景
        cache_maintenance_cost_monthly: float = 100.0,  # 美元/月
        cache_setup_cost_one_time: float = 0.0  # 一次性建设成本，美元
    ) -> Dict:
        """
        比较使用缓存前后的成本与收益。

        without_cache: {
            'daily_requests': 100,
            'avg_input_tokens': 2000,
            'avg_output_tokens': 500,
        }

        with_cache: {
            'daily_requests': 100,
            'avg_input_tokens': 2000,
            'avg_output_tokens': 500,
            'cache_hit_ratio': 0.6,  # 60%的请求命中缓存
        }
        """

        # 计算无缓存场景的月成本
        without_cache_cost = self.calculator.project_monthly_cost(
            model_key,
            without_cache['daily_requests'],
            without_cache['avg_input_tokens'],
            without_cache['avg_output_tokens'],
            use_cache=False
        )

        # 计算有缓存场景的月成本
        with_cache_cost = self.calculator.project_monthly_cost(
            model_key,
            with_cache['daily_requests'],
            with_cache['avg_input_tokens'],
            with_cache['avg_output_tokens'],
            use_cache=True,
            cache_hit_ratio=with_cache['cache_hit_ratio']
        )

        # 成本对比
        cache_api_savings = (
            without_cache_cost['monthly_cost'] -
            with_cache_cost['monthly_cost']
        )

        cache_net_savings = cache_api_savings - cache_maintenance_cost_monthly

        # 计算 ROI
        roi_percentage = (
            (cache_net_savings / cache_maintenance_cost_monthly * 100)
            if cache_maintenance_cost_monthly > 0 else float('inf')
        )

        # 计算一次性建设成本的回本周期（breakeven）
        months_to_breakeven = (
            cache_setup_cost_one_time / cache_net_savings
            if cache_setup_cost_one_time > 0 and cache_net_savings > 0
            else 0.0 if cache_setup_cost_one_time == 0 and cache_net_savings > 0
            else float('inf')
        )

        return {
            'without_cache': {
                'monthly_cost_usd': f"${without_cache_cost['monthly_cost']:.2f}",
                'monthly_cost_value': without_cache_cost['monthly_cost'],
            },
            'with_cache': {
                'monthly_cost_usd': f"${with_cache_cost['monthly_cost']:.2f}",
                'monthly_cost_value': with_cache_cost['monthly_cost'],
                'cache_hit_ratio': with_cache['cache_hit_ratio'],
            },
            'cache_investment': {
                'monthly_maintenance_cost_usd': f"${cache_maintenance_cost_monthly:.2f}",
                'monthly_maintenance_cost_value': cache_maintenance_cost_monthly,
                'one_time_setup_cost_usd': f"${cache_setup_cost_one_time:.2f}",
                'one_time_setup_cost_value': cache_setup_cost_one_time,
            },
            'roi': {
                'api_savings_monthly_usd': f"${cache_api_savings:.2f}",
                'api_savings_monthly_value': cache_api_savings,
                'net_savings_monthly_usd': f"${cache_net_savings:.2f}",
                'net_savings_monthly_value': cache_net_savings,
                'roi_percentage': roi_percentage,
                'months_to_breakeven': months_to_breakeven,
            }
        }

    def cache_strategy_comparison(
        self,
        model_key: str,
        baseline_scenario: Dict,
        strategy_configs: List[Dict]  # 多个缓存策略配置
    ) -> List[Dict]:
        """
        比较多个缓存策略的效果。

        strategy_configs: [
            {'name': '保守策略', 'cache_hit_ratio': 0.3},
            {'name': '中等策略', 'cache_hit_ratio': 0.6},
            {'name': '激进策略', 'cache_hit_ratio': 0.8},
        ]
        """

        comparison_results = []

        for strategy in strategy_configs:
            scenario = baseline_scenario.copy()
            scenario['cache_hit_ratio'] = strategy['cache_hit_ratio']

            roi = self.analyze_cache_investment(
                model_key,
                baseline_scenario,
                scenario
            )

            roi['strategy_name'] = strategy['name']
            roi['cache_hit_ratio'] = strategy['cache_hit_ratio']

            comparison_results.append(roi)

        return comparison_results
```

## 12.2.1.5 综合 ROI评估框架

```python
from typing import Dict, List, Tuple
from datetime import datetime

class ComprehensiveROIAnalyzer:
    """综合的 ROI评估框架，包含多维成本与收益"""

    def __init__(self):
        self.token_calculator = TokenCostCalculator()
        self.cache_analyzer = CacheROIAnalyzer(self.token_calculator)

    def estimate_context_engineering_roi(
        self,
        project_name: str,
        baseline_metrics: Dict,  # 优化前的指标
        optimized_metrics: Dict,  # 优化后的指标
        optimization_cost: Dict,  # 优化的成本投入
        evaluation_period_months: int = 12
    ) -> Dict:
        """
        计算整个上下文工程优化项目的 ROI。

        baseline_metrics: {
            'monthly_api_cost': 1000,
            'avg_response_time_ms': 2000,
            'user_satisfaction_score': 3.5,  # 1-5分
            'error_rate': 0.05,  # 5%
        }

        optimized_metrics: {
            'monthly_api_cost': 600,
            'avg_response_time_ms': 800,
            'user_satisfaction_score': 4.2,
            'error_rate': 0.01,  # 1%
        }

        optimization_cost: {
            'engineering_hours': 100,
            'hourly_rate': 150,
            'infrastructure_one_time': 5000,
            'monthly_maintenance': 500,
        }
        """

        # 1. 直接成本节省
        monthly_api_savings = (
            baseline_metrics['monthly_api_cost'] -
            optimized_metrics['monthly_api_cost']
        )

        # 2. 性能改善带来的间接收益
        # 假设响应时间每减少 1秒，转化率提升 0.5%
        response_time_improvement_sec = (
            baseline_metrics['avg_response_time_ms'] -
            optimized_metrics['avg_response_time_ms']
        ) / 1000

        assumed_monthly_revenue = 100000  # 假设月收入
        response_time_roi_multiplier = response_time_improvement_sec * 0.005  # 转化率提升
        monthly_revenue_gain = assumed_monthly_revenue * response_time_roi_multiplier

        # 3. 用户满意度提升的价值
        satisfaction_improvement = (
            optimized_metrics['user_satisfaction_score'] -
            baseline_metrics['user_satisfaction_score']
        )
        # 假设满意度提升 1分，用户续约率提升 2%
        monthly_user_retention_gain = assumed_monthly_revenue * 0.02 * satisfaction_improvement

        # 4. 错误率改善的成本节省
        # 假设每个错误造成的成本是处理费用+赔偿
        monthly_error_cost_reduction = (
            (baseline_metrics['error_rate'] - optimized_metrics['error_rate']) *
            assumed_monthly_revenue *
            0.01  # 假设错误成本是收入的 1%
        )

        # 5. 优化成本
        engineering_cost = (
            optimization_cost['engineering_hours'] *
            optimization_cost['hourly_rate']
        )

        monthly_maintenance = optimization_cost.get('monthly_maintenance', 0)

        # 6. 总的月度收益
        total_monthly_benefit = (
            monthly_api_savings +
            monthly_revenue_gain +
            monthly_user_retention_gain +
            monthly_error_cost_reduction
        )

        # 7. 总的月度成本
        total_monthly_cost = monthly_maintenance

        # 8. ROI计算
        evaluation_period_cost = (
            engineering_cost +
            optimization_cost.get('infrastructure_one_time', 0) +
            (monthly_maintenance * evaluation_period_months)
        )

        evaluation_period_benefit = total_monthly_benefit * evaluation_period_months

        roi = {
            'project_name': project_name,
            'evaluation_period_months': evaluation_period_months,
            'one_time_costs': {
                'engineering': engineering_cost,
                'infrastructure': optimization_cost.get('infrastructure_one_time', 0),
            },
            'monthly_costs': {
                'maintenance': monthly_maintenance,
            },
            'monthly_benefits': {
                'api_cost_savings': monthly_api_savings,
                'response_time_roi': monthly_revenue_gain,
                'user_satisfaction_roi': monthly_user_retention_gain,
                'error_reduction_roi': monthly_error_cost_reduction,
                'total': total_monthly_benefit,
            },
            'period_analysis': {
                'total_cost': evaluation_period_cost,
                'total_benefit': evaluation_period_benefit,
                'net_benefit': evaluation_period_benefit - evaluation_period_cost,
                'roi_percentage': (
                    (evaluation_period_benefit - evaluation_period_cost) /
                    evaluation_period_cost * 100
                    if evaluation_period_cost > 0 else 0
                ),
            },
            'metrics_comparison': {
                'baseline': baseline_metrics,
                'optimized': optimized_metrics,
            }
        }

        return roi

    def generate_roi_report(
        self,
        roi_analysis: Dict,
        output_format: str = 'text'
    ) -> str:
        """生成易读的 ROI报告"""

        if output_format == 'text':
            report = f"""
===== 上下文工程优化 ROI 报告 =====

项目名称: {roi_analysis['project_name']}
评估周期: {roi_analysis['evaluation_period_months']} 个月

【成本总结】
一次性成本:
  - 工程成本: ${roi_analysis['one_time_costs']['engineering']:.2f}
  - 基础设施: ${roi_analysis['one_time_costs']['infrastructure']:.2f}

月度成本:
  - 运维: ${roi_analysis['monthly_costs']['maintenance']:.2f}

【收益总结】
月度收益分项:
  - API成本节省: ${roi_analysis['monthly_benefits']['api_cost_savings']:.2f}
  - 响应时间 ROI: ${roi_analysis['monthly_benefits']['response_time_roi']:.2f}
  - 满意度提升 ROI: ${roi_analysis['monthly_benefits']['user_satisfaction_roi']:.2f}
  - 错误率改善: ${roi_analysis['monthly_benefits']['error_reduction_roi']:.2f}
  - 月度总收益: ${roi_analysis['monthly_benefits']['total']:.2f}

【投资回报】
评估周期内:
  - 总成本: ${roi_analysis['period_analysis']['total_cost']:.2f}
  - 总收益: ${roi_analysis['period_analysis']['total_benefit']:.2f}
  - 净收益: ${roi_analysis['period_analysis']['net_benefit']:.2f}
  - ROI: {roi_analysis['period_analysis']['roi_percentage']:.1f}%

【关键指标对比】
              | 优化前    | 优化后    | 改善
-----------------------------------
API成本       | ${roi_analysis['metrics_comparison']['baseline']['monthly_api_cost']:.0f}  | ${roi_analysis['metrics_comparison']['optimized']['monthly_api_cost']:.0f}  | {(roi_analysis['metrics_comparison']['baseline']['monthly_api_cost'] - roi_analysis['metrics_comparison']['optimized']['monthly_api_cost']) / roi_analysis['metrics_comparison']['baseline']['monthly_api_cost'] * 100:.1f}%
响应时间(ms)  | {roi_analysis['metrics_comparison']['baseline']['avg_response_time_ms']}   | {roi_analysis['metrics_comparison']['optimized']['avg_response_time_ms']}   | {(roi_analysis['metrics_comparison']['baseline']['avg_response_time_ms'] - roi_analysis['metrics_comparison']['optimized']['avg_response_time_ms']) / roi_analysis['metrics_comparison']['baseline']['avg_response_time_ms'] * 100:.1f}%
满意度(1-5)   | {roi_analysis['metrics_comparison']['baseline']['user_satisfaction_score']:.1f}   | {roi_analysis['metrics_comparison']['optimized']['user_satisfaction_score']:.1f}   | +{roi_analysis['metrics_comparison']['optimized']['user_satisfaction_score'] - roi_analysis['metrics_comparison']['baseline']['user_satisfaction_score']:.1f}
错误率        | {roi_analysis['metrics_comparison']['baseline']['error_rate']:.1%} | {roi_analysis['metrics_comparison']['optimized']['error_rate']:.1%} | {(roi_analysis['metrics_comparison']['baseline']['error_rate'] - roi_analysis['metrics_comparison']['optimized']['error_rate']) / roi_analysis['metrics_comparison']['baseline']['error_rate'] * 100:.1f}%

【建议】
✓ 投资收益明显，建议继续投入
"""
        elif output_format == 'json':
            report = json.dumps(roi_analysis, indent=2, default=str)

        return report
```

## 12.2.1.6 实际案例：电商客服系统的成本分析

```python
# 场景：电商平台集成了 AI客服，使用 RAG系统回答商品问题

# 优化前的基线
baseline = {
    'daily_requests': 500,
    'avg_input_tokens': 3000,  # 包含长的产品描述和历史记录
    'avg_output_tokens': 300,
    'monthly_api_cost': 4500,
    'avg_response_time_ms': 3000,
    'user_satisfaction_score': 3.2,  # 信息检索不准
    'error_rate': 0.08,  # 返回不相关信息
}

# 优化后（实施了分块优化、向量检索、缓存）
optimized = {
    'daily_requests': 500,
    'avg_input_tokens': 1500,  # 上下文优化后减少一半
    'avg_output_tokens': 300,
    'monthly_api_cost': 2200,  # 成本下降 49%
    'avg_response_time_ms': 800,  # 响应时间快 75%
    'user_satisfaction_score': 4.3,  # 满意度提升
    'error_rate': 0.02,  # 错误率从 8%降到 2%
}

# 优化成本
optimization_cost = {
    'engineering_hours': 120,
    'hourly_rate': 120,  # $120/小时
    'infrastructure_one_time': 3000,  # 向量库搭建
    'monthly_maintenance': 400,  # 向量库维护
}

# 执行分析
analyzer = ComprehensiveROIAnalyzer()

roi_result = analyzer.estimate_context_engineering_roi(
    project_name='电商 AI客服系统优化',
    baseline_metrics=baseline,
    optimized_metrics=optimized,
    optimization_cost=optimization_cost,
    evaluation_period_months=12
)

print(analyzer.generate_roi_report(roi_result, output_format='text'))
```

这个框架可以帮助团队做出有数据支撑的决策：是否投资某项上下文工程优化、投资多少、预期能获得多少回报。


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://yeasy.gitbook.io/context_engineering_guide/di-si-bu-fen-gong-cheng-shi-zhan-yu-wei-lai-yan-jin/12_production/12.2_optimization/12.2.1_cost_modeling_roi.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.