{"version":1,"pages":[{"id":"rLLoUXt3HcIJx9JHWtR8","title":"大模型原理与架构","pathname":"/llm_internals","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":""},{"id":"MNM8bclWBIHKEb6EUkVw","title":"第一章：从序列建模到 Transformer","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/01_introduction","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"}]},{"id":"5aUgBKJq7bA2jF1W0ZTn","title":"1.1 序列建模的根本挑战","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/01_introduction/1.1_seq_challenge","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第一章：从序列建模到 Transformer"}]},{"id":"L5mef7AHYmXR6n0IY3ZA","title":"1.2 RNN 与 CNN：成就与瓶颈","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/01_introduction/1.2_rnn_cnn_limits","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第一章：从序列建模到 Transformer"}]},{"id":"D817ImDEhDyv6B3PmMaR","title":"1.3 注意力的诞生：让模型学会“看哪里”","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/01_introduction/1.3_attention_birth","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第一章：从序列建模到 Transformer"}]},{"id":"ZinIbiUpDTqhlEsUIahn","title":"1.4 Transformer 的提出与核心思想","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/01_introduction/1.4_transformer_idea","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第一章：从序列建模到 Transformer"}]},{"id":"6yUiNzeVo6gO544MFjDC","title":"1.5 里程碑时刻：从学术论文到产业变革","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/01_introduction/1.5_milestones","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第一章：从序列建模到 Transformer"}]},{"id":"LrshbweOwLA0aaCGP4Et","title":"本章小结","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/01_introduction/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第一章：从序列建模到 Transformer"}]},{"id":"1IhJrwQNm1FMxR0tbDEW","title":"第二章：注意力机制：为什么它是核心","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/02_attention","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"}]},{"id":"3yOPkkCiZ3LRPPGmgGow","title":"2.1 查询-键-值：一种信息检索的直觉","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/02_attention/2.1_qkv_intuition","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第二章：注意力机制：为什么它是核心"}]},{"id":"JVgxWDMQenrSfBsDMYoc","title":"2.2 缩放点积注意力：为什么要除以根号 d","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/02_attention/2.2_scaled_dot_product","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第二章：注意力机制：为什么它是核心"}]},{"id":"ZrscFjmvMQMs9rw250LX","title":"2.3 多头注意力：为什么多个子空间更好","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/02_attention/2.3_multi_head","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第二章：注意力机制：为什么它是核心"}]},{"id":"BGKJwft3zAB0LS3ebZgW","title":"2.4 自注意力、交叉注意力与因果掩码","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/02_attention/2.4_self_cross_causal","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第二章：注意力机制：为什么它是核心"}]},{"id":"spYKD7jldnYWVNBWZ5MX","title":"2.5 注意力的代价：复杂度与局限","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/02_attention/2.5_complexity_limits","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第二章：注意力机制：为什么它是核心"}]},{"id":"vfRinG6UXL7vB9OxaQ0d","title":"本章小结","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/02_attention/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第二章：注意力机制：为什么它是核心"}]},{"id":"sDJSJRjUxxOvnTTJL4k1","title":"第三章：Transformer 核心组件解析","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/03_components","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"}]},{"id":"J5841bvvR2BjgweHenQJ","title":"3.1 分词：从文本到词元","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/03_components/3.1_tokenization","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第三章：Transformer 核心组件解析"}]},{"id":"WCB38aarSpeqQU6oUM7t","title":"3.2 词嵌入：从离散符号到连续向量","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/03_components/3.2_embedding","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第三章：Transformer 核心组件解析"}]},{"id":"k5AfUyBXnX8ttxqcAek8","title":"3.3 位置编码：为什么顺序信息必须显式注入","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/03_components/3.3_position_encoding","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第三章：Transformer 核心组件解析"}]},{"id":"yvjWCvEBB9xprCf6Oxz1","title":"3.4 前馈网络：Transformer 的“记忆层”","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/03_components/3.4_feedforward","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第三章：Transformer 核心组件解析"}]},{"id":"yCkhtIV7TfDswbF3SnlQ","title":"3.5 残差连接：梯度为什么能流过百层网络","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/03_components/3.5_residual","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第三章：Transformer 核心组件解析"}]},{"id":"BNvPZh5OXylNgwYKmurO","title":"3.6 层归一化：为什么选择 LayerNorm 而非 BatchNorm","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/03_components/3.6_layer_norm","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第三章：Transformer 核心组件解析"}]},{"id":"ARFD87b6YHZqVr9EORYM","title":"3.7 编码器-解码器：完整架构如何协同工作","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/03_components/3.7_full_architecture","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第三章：Transformer 核心组件解析"}]},{"id":"qAPoe1Xwc6kGugUDbATF","title":"本章小结","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/03_components/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第三章：Transformer 核心组件解析"}]},{"id":"yp342e62PgnIwagYd0jQ","title":"第四章：位置编码的设计哲学","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/04_position_encoding","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"}]},{"id":"HLZHSHLvWDq4ozNgvm6X","title":"4.1 正弦位置编码：频率与外推的直觉","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/04_position_encoding/4.1_sinusoidal","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第四章：位置编码的设计哲学"}]},{"id":"sWay2eJX51tkSNsqXadM","title":"4.2 可学习位置编码：灵活性与局限","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/04_position_encoding/4.2_learnable","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第四章：位置编码的设计哲学"}]},{"id":"kLdbQySkHXKgzeu5OMmm","title":"4.3 旋转位置编码：为什么旋转能编码相对位置","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/04_position_encoding/4.3_rope","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第四章：位置编码的设计哲学"}]},{"id":"6G9qCw5JdgHQsPIEdElK","title":"4.4 ALiBi 与其他相对位置方案","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/04_position_encoding/4.4_alibi_others","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第四章：位置编码的设计哲学"}]},{"id":"LMihAkx9zehj81FtzzO8","title":"本章小结","pathname":"/llm_internals/di-yi-bu-fen-ji-chu-pian/04_position_encoding/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第一部分：基础篇"},{"label":"第四章：位置编码的设计哲学"}]},{"id":"34o5HkPqf8PH7nUR7dCs","title":"第五章：预训练：为什么“预测下一个词”能学到知识","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/05_pretraining","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"}]},{"id":"a5pwKdAfEQhAhTiAT2LM","title":"5.1 自回归语言模型：从左到右的世界观","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/05_pretraining/5.1_autoregressive","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第五章：预训练：为什么“预测下一个词”能学到知识"}]},{"id":"v21o335HteNxqpQkdNaE","title":"5.2 掩码语言模型：完形填空的智慧","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/05_pretraining/5.2_masked_lm","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第五章：预训练：为什么“预测下一个词”能学到知识"}]},{"id":"rls9850ilqgT5klhU3RA","title":"5.3 编码器-解码器预训练：两种范式的统一","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/05_pretraining/5.3_encoder_decoder","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第五章：预训练：为什么“预测下一个词”能学到知识"}]},{"id":"lI7S8SCRDA1HwpG7FOnI","title":"5.4 预训练数据：规模定律与数据质量的博弈","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/05_pretraining/5.4_data_scaling","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第五章：预训练：为什么“预测下一个词”能学到知识"}]},{"id":"3TPGqP5VDToy8fTVwd5p","title":"本章小结","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/05_pretraining/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第五章：预训练：为什么“预测下一个词”能学到知识"}]},{"id":"1O6AEyErtWeXcR207omO","title":"第六章：训练技术的底层逻辑","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/06_training_techniques","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"}]},{"id":"RsoKmvbqPpl1EGOoVsnv","title":"6.1 损失函数与优化器：为什么选择 Adam","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/06_training_techniques/6.1_loss_optimizer","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第六章：训练技术的底层逻辑"}]},{"id":"NxwZ1Igo6dGFehOSf3Vt","title":"6.2 学习率调度：为什么需要先预热再衰减","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/06_training_techniques/6.2_lr_schedule","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第六章：训练技术的底层逻辑"}]},{"id":"icIZU2CEC0mffUcQhoko","title":"6.3 正则化策略：防止过拟合的多重手段","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/06_training_techniques/6.3_regularization","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第六章：训练技术的底层逻辑"}]},{"id":"LwUuz9a62iTaARE7GNDb","title":"6.4 批次与序列长度：效率与质量的平衡","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/06_training_techniques/6.4_batch_sequence","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第六章：训练技术的底层逻辑"}]},{"id":"BAL4UB4rRPSRDYicoUYm","title":"本章小结","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/06_training_techniques/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第六章：训练技术的底层逻辑"}]},{"id":"uQBGMRDxeS0vR4lQaAQS","title":"第七章：大规模分布式训练","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/07_distributed_training","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"}]},{"id":"BaHPy2eDJeuds3z3INVI","title":"7.1 数据并行：为什么简单复制就能加速","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/07_distributed_training/7.1_data_parallel","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第七章：大规模分布式训练"}]},{"id":"Q9jNTJ2OjoimDuL4U6VQ","title":"7.2 ZeRO 优化：如何突破单卡显存限制","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/07_distributed_training/7.2_zero","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第七章：大规模分布式训练"}]},{"id":"G5ywFP5d3otydgc8uFdl","title":"7.3 模型并行与张量并行：拆分权重的艺术","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/07_distributed_training/7.3_model_tensor_parallel","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第七章：大规模分布式训练"}]},{"id":"XCmMdW4S3KtX9p0Ye3Vu","title":"7.4 流水线并行与混合并行策略","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/07_distributed_training/7.4_pipeline_hybrid","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第七章：大规模分布式训练"}]},{"id":"xCfHqjSNlU6Y0OGxukqn","title":"7.5 激活重计算：用时间换空间的艺术","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/07_distributed_training/7.5_activation_checkpointing","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第七章：大规模分布式训练"}]},{"id":"Du69l4zpnVWUITowHlPD","title":"7.6 混合精度训练：精度与速度的权衡","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/07_distributed_training/7.6_mixed_precision","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第七章：大规模分布式训练"}]},{"id":"YETFRnDRu23mpMPUpYy1","title":"7.7 检查点管理与容错","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/07_distributed_training/7.7_checkpoint","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第七章：大规模分布式训练"}]},{"id":"fYzQY4ejbjwgzeIehxSu","title":"本章小结","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/07_distributed_training/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第七章：大规模分布式训练"}]},{"id":"bTNKJIH7a7fFX6aQxefa","title":"第八章：从预训练到对齐：让模型有用且安全","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/08_alignment","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"}]},{"id":"3LR2f82YvPbZyeoPsO4w","title":"8.1 监督微调：教模型“怎么回答”","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/08_alignment/8.1_sft","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第八章：从预训练到对齐：让模型有用且安全"}]},{"id":"Vcj0VAkPIsmReSgsJZ67","title":"8.2 RLHF：为什么需要人类反馈参与训练","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/08_alignment/8.2_rlhf","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第八章：从预训练到对齐：让模型有用且安全"}]},{"id":"aUBmiCj31YuU7NQtRXZY","title":"8.3 DPO 与新型对齐：从复杂到简洁的演化","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/08_alignment/8.3_dpo","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第八章：从预训练到对齐：让模型有用且安全"}]},{"id":"7qjmGbADw1EthX0qboDK","title":"8.4 参数高效微调：为什么不必更新所有参数","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/08_alignment/8.4_peft","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第八章：从预训练到对齐：让模型有用且安全"}]},{"id":"K0tODtqJbn496Xym70Bl","title":"本章小结","pathname":"/llm_internals/di-er-bu-fen-xun-lian-pian/08_alignment/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第二部分：训练篇"},{"label":"第八章：从预训练到对齐：让模型有用且安全"}]},{"id":"ZgC7ZcHr8JcLkBzy1xHs","title":"第九章：解码策略：模型如何生成文本","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/09_decoding","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"}]},{"id":"FeamqIW8bdEf8rwAdmt4","title":"9.1 自回归解码：逐词生成的机制","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/09_decoding/9.1_autoregressive_decode","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第九章：解码策略：模型如何生成文本"}]},{"id":"qPZZChxt8AogPLauHhra","title":"9.2 贪心搜索与束搜索：确定性与近似搜索","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/09_decoding/9.2_greedy_beam","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第九章：解码策略：模型如何生成文本"}]},{"id":"U25Bnri4IgyUFf3ieGJx","title":"9.3 采样策略：温度、Top-k 与 Top-p 的设计直觉","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/09_decoding/9.3_sampling","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第九章：解码策略：模型如何生成文本"}]},{"id":"R8PxI61Lmm0hrPQPPm6C","title":"9.4 结构化输出与约束解码","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/09_decoding/9.4_constrained","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第九章：解码策略：模型如何生成文本"}]},{"id":"tO5bYqhC4LAEuQkwCrFp","title":"9.5 解码侧的推理时扩展：生成、搜索与验证","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/09_decoding/9.5_test_time_scaling","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第九章：解码策略：模型如何生成文本"}]},{"id":"TSJG7alkCS2qcIe1Ef7m","title":"本章小结","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/09_decoding/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第九章：解码策略：模型如何生成文本"}]},{"id":"WdSvmh7Zlh3k787l8Ssa","title":"第十章：推理优化：第一性原理的分析","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/10_inference_optimization","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"}]},{"id":"OOat74L0frmvfCaRnA0T","title":"10.1 推理瓶颈分析：计算密集还是访存密集","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/10_inference_optimization/10.1_bottleneck","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十章：推理优化：第一性原理的分析"}]},{"id":"zln2XCV6ANBp1YTuxxbG","title":"10.2 KV 缓存：为什么能避免重复计算","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/10_inference_optimization/10.2_kv_cache","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十章：推理优化：第一性原理的分析"}]},{"id":"RpXKFQ9tPDivUn4FedhD","title":"10.3 Flash Attention：IO 感知的算法设计","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/10_inference_optimization/10.3_flash_attention","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十章：推理优化：第一性原理的分析"}]},{"id":"8VW3fvK8098zNmt4rNvf","title":"10.4 模型量化：用更少的位数表示权重与激活值","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/10_inference_optimization/10.4_quantization","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十章：推理优化：第一性原理的分析"}]},{"id":"eTC3V8R1m2aiThZzRfkm","title":"10.5 剪枝与知识蒸馏：模型瘦身的两条路","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/10_inference_optimization/10.5_pruning_distillation","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十章：推理优化：第一性原理的分析"}]},{"id":"Kf5K8Ht40gsijnQMQ37x","title":"10.6 投机解码：为什么“先猜后验”能加速","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/10_inference_optimization/10.6_speculative_decoding","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十章：推理优化：第一性原理的分析"}]},{"id":"YSbSUQk8DMGYeGo2wO7o","title":"本章小结","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/10_inference_optimization/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十章：推理优化：第一性原理的分析"}]},{"id":"lRWYMZVSyeyzJHMwFous","title":"第十一章：推理引擎与生产部署","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/11_serving","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"}]},{"id":"tHfue2WBjrt4kZtlorkA","title":"11.1 推理引擎架构概览","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/11_serving/11.1_engines_overview","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十一章：推理引擎与生产部署"}]},{"id":"66XKaNaLPkPvEnobfXmc","title":"11.2 连续批处理与 PagedAttention","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/11_serving/11.2_continuous_batching","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十一章：推理引擎与生产部署"}]},{"id":"DcZs5R0Dsmzyi6wNP6Fg","title":"11.3 分离式 Prefill-Decode 架构","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/11_serving/11.3_disaggregated_serving","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十一章：推理引擎与生产部署"}]},{"id":"VLMZWTDthJ6S6js0Y3d8","title":"11.4 硬件选型：GPU、TPU 与专用加速器","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/11_serving/11.4_hardware","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十一章：推理引擎与生产部署"}]},{"id":"kM5LEhqkKUMTPBGpI3KB","title":"11.5 生产部署最佳实践","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/11_serving/11.5_best_practices","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十一章：推理引擎与生产部署"}]},{"id":"65ucxKRRTxVTgcLiWpv1","title":"本章小结","pathname":"/llm_internals/di-san-bu-fen-tui-li-yu-bu-shu-pian/11_serving/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第三部分：推理与部署篇"},{"label":"第十一章：推理引擎与生产部署"}]},{"id":"1ytKhmgEneV6wbI2smBZ","title":"第十二章：编码器系列模型","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/12_encoder_models","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"}]},{"id":"tWzD0HJLK16at9H1BhlN","title":"12.1 BERT：双向理解的突破","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/12_encoder_models/12.1_bert","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十二章：编码器系列模型"}]},{"id":"5VRwxywFqpGMsnOI2l0J","title":"12.2 RoBERTa、ALBERT 与 ELECTRA：BERT 的改进之路","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/12_encoder_models/12.2_roberta_albert","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十二章：编码器系列模型"}]},{"id":"bNJKceRdgUvNrRjs4vnh","title":"12.3 长文本编码器：Longformer 与 BigBird","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/12_encoder_models/12.3_longformer_bigbird","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十二章：编码器系列模型"}]},{"id":"5Vs8FpGwXVEwAgjuwBmn","title":"本章小结","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/12_encoder_models/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十二章：编码器系列模型"}]},{"id":"rzT5kOfoTmECoHsURljt","title":"第十三章：解码器系列与主流 LLM","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/13_decoder_models","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"}]},{"id":"J4Wvy0HF2e15RkQVu4po","title":"13.1 GPT 系列：从语言模型到通用智能的扩展之路","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/13_decoder_models/13.1_gpt_series","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十三章：解码器系列与主流 LLM"}]},{"id":"gvU1a0gyzfWQFiNVj24c","title":"13.2 Llama 家族：开放权重如何改变 LLM 格局","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/13_decoder_models/13.2_llama","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十三章：解码器系列与主流 LLM"}]},{"id":"LuPOmXbuEMwM3Z2qEyOq","title":"13.3 DeepSeek、Gemini 与其他前沿模型","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/13_decoder_models/13.3_deepseek_gemini","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十三章：解码器系列与主流 LLM"}]},{"id":"TdHDaKNnUdlziaJqT1QH","title":"13.4 编码器-解码器模型：T5 与 BART 的设计选择","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/13_decoder_models/13.4_t5_bart","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十三章：解码器系列与主流 LLM"}]},{"id":"T3NqOrEn2dp3UyjNYTf7","title":"本章小结","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/13_decoder_models/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十三章：解码器系列与主流 LLM"}]},{"id":"ik68ruon1eaH6DdZ6uZz","title":"第十四章：架构创新与未来趋势","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"}]},{"id":"wlmQYvnf4lNRdaRabidN","title":"14.1 高效注意力：突破平方复杂度的瓶颈","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends/14.1_efficient_attention","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十四章：架构创新与未来趋势"}]},{"id":"8DHOD5Byr8E1pkOhZfVq","title":"14.2 混合专家模型：为什么不必激活所有参数","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends/14.2_moe","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十四章：架构创新与未来趋势"}]},{"id":"xMExuE743fBerygn5nst","title":"14.3 状态空间模型与混合架构：注意力的挑战者","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends/14.3_ssm_hybrid","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十四章：架构创新与未来趋势"}]},{"id":"6hARbHqoPVoXfQWdj9gF","title":"14.4 多模态 Transformer：统一不同模态的表示","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends/14.4_multimodal","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十四章：架构创新与未来趋势"}]},{"id":"ncdjH1KtUeW3iSZF2aLZ","title":"14.5 AI Agent 与工具调用：让模型从“说”到“做”","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends/14.5_agent_tool_use","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十四章：架构创新与未来趋势"}]},{"id":"Y36PQaJZjOCCbzIsL3bs","title":"14.6 推理时计算扩展：让模型学会深度思考","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends/14.6_test_time_scaling","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十四章：架构创新与未来趋势"}]},{"id":"lq2EJKs5u0sgbs6fr3FF","title":"14.7 长上下文技术：从理论到工程实践","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends/14.7_long_context","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十四章：架构创新与未来趋势"}]},{"id":"OrBCTGY2Hbp0JUYGHRss","title":"14.8 未来展望","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends/14.8_outlook","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十四章：架构创新与未来趋势"}]},{"id":"VLFjFvJrwfVLvEgiSLu2","title":"本章小结","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/14_future_trends/summary","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"第十四章：架构创新与未来趋势"}]},{"id":"pYiHp414cOqUf2anfrS3","title":"附录","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/appendix","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"}]},{"id":"wwSOhMvuCSX20WBUoMkM","title":"A.1 数学基础速查","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/appendix/a1_math_basics","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"附录"}]},{"id":"NOUHgqavEk5kX6754fye","title":"A.2 PyTorch 实现示例","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/appendix/a2_pytorch_examples","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"附录"}]},{"id":"uk5i2Bj1d4YKmiRt57uG","title":"A.3 主流模型参数速查表","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/appendix/a3_model_reference","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"附录"}]},{"id":"1ohvjYAXd4DIA1umi4iK","title":"A.4 推荐阅读与参考文献","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/appendix/a4_references","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"附录"}]},{"id":"KVqdWLRTtGUQH9NDGD5x","title":"A.5 快变事实核验表","pathname":"/llm_internals/di-si-bu-fen-mo-xing-yu-qian-yan-pian/appendix/a5_volatile_facts","siteSpaceId":"sitesp_pI6f7","lang":"zh","description":"","breadcrumbs":[{"label":"第四部分：模型与前沿篇"},{"label":"附录"}]}]}