# 高质量系统提示词的结构
## 1. 角色定义
You are an expert [domain] professional with [X years] of experience.
## 2. 目标陈述
Your primary goal is to [specific objective].
## 3. 约束和原则
- Principle 1: [description]
- Principle 2: [description]
## 4. 风格和格式
- Tone: [descriptive]
- Format: [specific format]
- Language level: [proficiency level]
## 5. 边界条件
- Do not: [specific constraints]
- Always: [mandatory requirements]
system_prompt = """
You are a certified financial advisor with 15 years of experience in personal finance
and investment strategy.
PRIMARY GOAL:
Provide comprehensive, evidence-based financial guidance tailored to the user's
specific situation and goals.
CORE PRINCIPLES:
1. Always disclose any assumptions about the user's financial situation
2. Consider tax implications and regulatory constraints
3. Present multiple options with clear trade-offs
4. Base recommendations on published research and data
5. Acknowledge uncertainty and limitations
CONSTRAINTS:
- Do NOT provide guaranteed returns or financial predictions
- Do NOT recommend specific securities without full disclosure of limitations
- Do NOT advise on insurance if not qualified
- Always recommend consulting with appropriate professionals for complex matters
OUTPUT FORMAT:
- Situation analysis (2-3 sentences)
- Key considerations (3-5 bullet points)
- Recommended approaches (2-3 options with pros/cons)
- Action items (specific, numbered steps)
"""
from anthropic import Anthropic
import numpy as np
class ContextSelector:
def __init__(self, documents: list[str]):
"""初始化上下文选择器"""
self.documents = documents
self.embeddings = self._compute_embeddings()
def _compute_embeddings(self):
"""计算文档的向量嵌入"""
# 在实际应用中,可以使用 OpenAI、Cohere 或其他 embedding 服务
# 这里是伪代码
return [embed_document(doc) for doc in self.documents]
def select_relevant_context(self, query: str, top_k: int = 5) -> list[str]:
"""基于查询选择最相关的文档"""
query_embedding = embed_document(query)
# 计算相似度
similarities = [
cosine_similarity(query_embedding, doc_emb)
for doc_emb in self.embeddings
]
# 获取 top-k 最相关的文档
top_indices = np.argsort(similarities)[-top_k:][::-1]
return [self.documents[i] for i in top_indices]
def select_with_diversity(self, query: str, top_k: int = 5) -> list[str]:
"""选择相关且多样化的上下文(避免重复信息)"""
relevant = self.select_relevant_context(query, top_k * 2)
# 简单的多样性过滤:选择在主题上不同的文档
selected = []
for doc in relevant:
if not any(has_high_overlap(doc, s) for s in selected):
selected.append(doc)
if len(selected) == top_k:
break
return selected
def cosine_similarity(a, b):
"""计算余弦相似度"""
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def embed_document(text: str):
"""嵌入文本(实现取决于具体服务)"""
# 示例代码
pass
def has_high_overlap(doc1: str, doc2: str, threshold: float = 0.7) -> bool:
"""检查两个文档是否有高度重叠"""
words1 = set(doc1.lower().split())
words2 = set(doc2.lower().split())
overlap = len(words1 & words2) / len(words1 | words2)
return overlap > threshold
class StructuredContextSelector:
def __init__(self):
self.documents = []
self.metadata_index = {}
def add_document(self, content: str, metadata: dict):
"""添加带元数据的文档"""
doc_id = len(self.documents)
self.documents.append(content)
# 为每个元数据字段建立索引
for key, value in metadata.items():
if key not in self.metadata_index:
self.metadata_index[key] = {}
if value not in self.metadata_index[key]:
self.metadata_index[key][value] = []
self.metadata_index[key][value].append(doc_id)
def select_by_criteria(self, criteria: dict) -> list[str]:
"""基于元数据条件选择文档"""
"""
criteria 示例:
{
"category": "technical",
"date_range": ("2024-01-01", "2025-03-01"),
"relevance_score": (0.7, 1.0)
}
"""
candidate_ids = set(range(len(self.documents)))
for key, value in criteria.items():
if key not in self.metadata_index:
continue
if isinstance(value, (tuple, list)):
# 范围查询
min_val, max_val = value
matching_ids = set()
for v, ids in self.metadata_index[key].items():
if min_val <= v <= max_val:
matching_ids.update(ids)
else:
# 精确查询
matching_ids = set(self.metadata_index[key].get(value, []))
candidate_ids &= matching_ids
return [self.documents[i] for i in candidate_ids]
def abstractive_compression(long_document: str, target_length: int = 1000) -> str:
"""将长文档压缩为摘要"""
client = Anthropic()
# 第一步:创建初始摘要
response = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=min(target_length, 2000),
messages=[
{
"role": "user",
"content": f"""Summarize this document focusing on key information:
{long_document}
Keep the summary to approximately {target_length} words.
Maintain all factual accuracy."""
}
]
)
summary = response.content[0].text
# 第二步:评估和迭代
if len(summary) > target_length * 1.5:
# 如果超过目标太多,进行第二轮压缩
response = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=int(target_length / 2),
messages=[
{
"role": "user",
"content": f"""Further compress this summary:
{summary}
Keep only the most critical information."""
}
]
)
summary = response.content[0].text
return summary
def structured_compression(document: str) -> dict:
"""将文档压缩为结构化格式"""
client = Anthropic()
response = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=2000,
messages=[
{
"role": "user",
"content": f"""Extract and organize this document into a structured format:
{document}
Provide in this JSON format:
{{
"main_topic": "...",
"key_points": ["point1", "point2", ...],
"important_facts": {{"fact1": "value1", ...}},
"entities": ["entity1", "entity2", ...],
"action_items": ["item1", "item2", ...],
"open_questions": ["question1", "question2", ...]
}}"""
}
]
)
return json.loads(response.content[0].text)
class ContextDeduplicator:
@staticmethod
def remove_duplicate_information(contexts: list[str]) -> list[str]:
"""移除上下文之间的重复信息"""
client = Anthropic()
# 将所有上下文合并
combined = "\n\n".join([f"Document {i}:\n{ctx}" for i, ctx in enumerate(contexts)])
response = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=2000,
messages=[
{
"role": "user",
"content": f"""Identify and remove duplicate or overlapping information from these documents:
{combined}
For each document, keep only the unique information not covered by others.
Provide the deduplicated documents in the same order."""
}
]
)
# 解析并返回去重后的上下文
# 实现细节...
return contexts # 简化版本
class IsolatedContextBuilder:
"""建立明确隔离的上下文结构"""
def __init__(self):
self.context_parts = {}
def add_system_knowledge(self, knowledge: str):
"""添加系统级背景知识"""
if "system_knowledge" not in self.context_parts:
self.context_parts["system_knowledge"] = []
self.context_parts["system_knowledge"].append(knowledge)
def add_user_provided(self, information: str):
"""添加用户提供的信息"""
if "user_provided" not in self.context_parts:
self.context_parts["user_provided"] = []
self.context_parts["user_provided"].append(information)
def add_retrieved_context(self, context: str, source: str = "unknown"):
"""添加检索到的上下文"""
if "retrieved_context" not in self.context_parts:
self.context_parts["retrieved_context"] = []
self.context_parts["retrieved_context"].append({
"content": context,
"source": source
})
def add_constraints(self, constraint: str):
"""添加系统约束"""
if "constraints" not in self.context_parts:
self.context_parts["constraints"] = []
self.context_parts["constraints"].append(constraint)
def build_prompt(self) -> str:
"""构建清晰隔离的提示"""
prompt = ""
# 系统知识
if "system_knowledge" in self.context_parts:
prompt += "## SYSTEM KNOWLEDGE\n"
for item in self.context_parts["system_knowledge"]:
prompt += f"- {item}\n"
prompt += "\n"
# 用户提供的信息
if "user_provided" in self.context_parts:
prompt += "## USER-PROVIDED INFORMATION\n"
for item in self.context_parts["user_provided"]:
prompt += f"- {item}\n"
prompt += "\n"
# 检索到的上下文
if "retrieved_context" in self.context_parts:
prompt += "## RETRIEVED CONTEXT\n"
for item in self.context_parts["retrieved_context"]:
prompt += f"### From: {item['source']}\n{item['content']}\n\n"
# 约束
if "constraints" in self.context_parts:
prompt += "## CONSTRAINTS\n"
for item in self.context_parts["constraints"]:
prompt += f"- {item}\n"
return prompt
def example_usage(self):
"""使用示例"""
builder = IsolatedContextBuilder()
# 添加不同类型的上下文
builder.add_system_knowledge("Claude 是由 Anthropic 开发的 AI 助手")
builder.add_system_knowledge("当前日期是 2025 年 3 月 5 日")
builder.add_user_provided("用户是一名产品经理")
builder.add_user_provided("正在开发一个 AI 应用")
builder.add_retrieved_context(
"关于 LLM 成本优化的最新研究...",
source="https://arxiv.org/..."
)
builder.add_constraints("不要提供具体的财务建议")
builder.add_constraints("基于已知信息进行回答,如果不确定则说明")
return builder.build_prompt()
system_prompt_template = """
# ROLE & AUTHORITY
[定义 Claude 的角色、专业背景、权威性]
# PRIMARY OBJECTIVE
[明确的、可测量的目标]
# CONTEXT & CONSTRAINTS
[任务的约束条件、边界和限制]
# INTERACTION STYLE
[交互方式、语气、格式]
# DECISION-MAKING FRAMEWORK
[做出决策时应遵循的框架或原则]
"""
# 具体示例
system_prompt = """
# ROLE & AUTHORITY
You are an expert software architect with 20+ years of experience designing scalable,
high-performance systems. You have worked at companies like Google, Amazon, and Microsoft.
# PRIMARY OBJECTIVE
Help users design robust system architectures that are scalable, maintainable, and cost-effective.
# CONTEXT & CONSTRAINTS
- You MUST consider real-world constraints like latency, bandwidth, and cost
- You SHOULD provide multiple design options with clear trade-offs
- You MUST NOT recommend unproven technologies as primary solutions
- Always prioritize simplicity and proven patterns over novelty
# INTERACTION STYLE
- Be direct and data-driven
- Use diagrams and pseudocode when helpful
- Explain technical concepts clearly
- Ask clarifying questions if requirements are unclear
# DECISION-MAKING FRAMEWORK
1. Understand requirements and constraints
2. Identify alternative approaches
3. Evaluate each approach against key criteria (scalability, cost, complexity)
4. Recommend the best fit with clear reasoning
5. Highlight potential future scalability issues
"""
def build_structured_prompt(task_description: str, background: str,
examples: list[str], constraints: list[str]) -> str:
"""构建高度结构化的提示"""
return f"""
<task>
<description>{task_description}</description>
<objective>
[What you want Claude to do or produce]
</objective>
</task>
<background>
<context>{background}</context>
</background>
<examples>
<count>{len(examples)}</count>
{chr(10).join([f'<example>{ex}</example>' for ex in examples])}
</examples>
<constraints>
<requirement priority="high">You MUST...</requirement>
<requirement priority="medium">You SHOULD...</requirement>
<requirement priority="low">You MAY...</requirement>
{chr(10).join([f'<constraint>{c}</constraint>' for c in constraints])}
</constraints>
<output>
<format>
[Specify the exact format of the output]
</format>
<length>
[Specify length constraints if any]
</length>
</output>
"""
class RAGPipeline:
def __init__(self, client, knowledge_base: list[str]):
self.client = client
self.knowledge_base = knowledge_base
self.embeddings = self._embed_knowledge_base()
def _embed_knowledge_base(self):
"""对知识库进行嵌入"""
# 在实际应用中,使用真实的 embedding 服务
pass
def retrieve_relevant_context(self, query: str, top_k: int = 5) -> list[str]:
"""检索与查询相关的上下文"""
# 对查询进行嵌入
# 计算相似度
# 返回最相关的文档
pass
def generate_response(self, query: str, context: list[str]) -> str:
"""基于检索到的上下文生成回答"""
# 构建上下文
context_str = "\n\n".join([f"[Source {i}]\n{ctx}" for i, ctx in enumerate(context)])
response = self.client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=2000,
system="""You are a helpful assistant that answers questions based on
provided context. Always cite your sources when using the provided documents.""",
messages=[
{
"role": "user",
"content": f"""Based on the following context, answer this question:
CONTEXT:
{context_str}
QUESTION:
{query}
Important: Only use information from the provided context. If the context doesn't contain
the answer, say so explicitly."""
}
]
)
return response.content[0].text
def query(self, question: str) -> dict:
"""完整的 RAG 查询流程"""
# 步骤 1:检索
relevant_context = self.retrieve_relevant_context(question)
# 步骤 2:生成
answer = self.generate_response(question, relevant_context)
return {
"question": question,
"answer": answer,
"sources": relevant_context
}
class MCPContextIntegration:
"""MCP 与上下文工程的集成示例"""
def __init__(self):
self.mcp_servers = []
def register_mcp_server(self, server_name: str, server_instance):
"""注册一个 MCP 服务器作为上下文源"""
self.mcp_servers.append({
"name": server_name,
"instance": server_instance
})
def fetch_context_from_mcp(self, server_name: str, resource_path: str) -> str:
"""从 MCP 服务器获取上下文"""
server = next((s for s in self.mcp_servers if s["name"] == server_name), None)
if server:
return server["instance"].get_resource(resource_path)
return ""
def build_mcp_enhanced_prompt(self, base_prompt: str, mcp_sources: dict) -> str:
"""构建增强了 MCP 上下文的提示"""
"""
mcp_sources 格式:
{
"github": "repo/main/src",
"notion": "database/project-docs"
}
"""
enhanced_prompt = base_prompt + "\n\n## CONTEXT FROM EXTERNAL SOURCES\n\n"
for source_name, resource_path in mcp_sources.items():
context = self.fetch_context_from_mcp(source_name, resource_path)
if context:
enhanced_prompt += f"### From {source_name}\n{context}\n\n"
return enhanced_prompt
def build_documentation_qa_system():
client = anthropic.Anthropic()
# 步骤 1:建立文档索引(离线)
doc_index = build_documentation_index("/path/to/docs")
# 步骤 2:处理用户问题
user_question = "How do I configure SSL certificates?"
# 步骤 3:检索相关文档
relevant_docs = doc_index.search(user_question, top_k=3)
# 步骤 4:构建系统提示词
system_prompt = """
You are a technical support specialist for our product. You have deep knowledge
of the product and access to official documentation.
GUIDELINES:
- Always cite the official documentation when providing answers
- If a question is not covered in the documentation, say so explicitly
- Provide step-by-step instructions when relevant
- Distinguish between official features, workarounds, and unsupported approaches
"""
# 步骤 5:构建用户消息,包含上下文
user_message = f"""
Based on the following documentation sections, answer this question:
DOCUMENTATION:
{chr(10).join([f"[Doc {i+1}]\n{doc}" for i, doc in enumerate(relevant_docs)])}
QUESTION:
{user_question}
Please provide a clear, step-by-step answer based on the documentation.
"""
# 步骤 6:调用 Claude
response = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=1000,
system=system_prompt,
messages=[{"role": "user", "content": user_message}]
)
return response.content[0].text
class CodeReviewContextBuilder:
def __init__(self, client):
self.client = client
def build_review_prompt(self, code_to_review: str,
codebase_style_guide: str,
security_policies: str,
similar_reviewed_code: list[str]) -> str:
"""构建代码审查的上下文"""
prompt = f"""
# CODE REVIEW GUIDELINES
## STYLE GUIDE
{codebase_style_guide}
## SECURITY POLICIES
{security_policies}
## SIMILAR CODE EXAMPLES (Previously Reviewed)
{chr(10).join([f'### Example {i+1}\\n{code}' for i, code in enumerate(similar_reviewed_code)])}
---
# CODE TO REVIEW
## REVIEW CHECKLIST
1. Code Style Compliance
2. Security Issues
3. Performance Concerns
4. Best Practices
5. Maintainability
6. Test Coverage
For each category, identify specific issues and provide recommendations.
"""
return prompt
def review_code(self, code: str, context_dict: dict) -> dict:
"""执行代码审查"""
prompt = self.build_review_prompt(
code,
context_dict["style_guide"],
context_dict["security_policies"],
context_dict["similar_code"]
)
response = self.client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=2000,
system="""You are an expert code reviewer with expertise in multiple
programming languages and best practices. Provide constructive, specific feedback.""",
messages=[{"role": "user", "content": prompt}]
)
return {
"code_reviewed": code[:100] + "...",
"review": response.content[0].text
}