def hybrid_search(query: str, alpha: float = 0.5) -> List[Dict]:
# 向量检索
vector_results = vector_store.search(query, k=10)
# 关键词检索(使用 SQLite FTS5)
keyword_results = sqlite_fts_search(query, k=10)
# 融合分数
combined = {}
for doc in vector_results:
combined[doc['id']] = alpha * doc['score']
for doc in keyword_results:
if doc['id'] in combined:
combined[doc['id']] += (1 - alpha) * doc['score']
else:
combined[doc['id']] = (1 - alpha) * doc['score']
# 按融合分数排序
return sorted(combined.items(), key=lambda x: x[1], reverse=True)
def sqlite_fts_search(query: str, k: int) -> List[Dict]:
"""
使用 SQLite FTS5 进行全文检索
优点:
1. 无需额外依赖,Python 内置 sqlite3
2. 极其轻量,单文件部署
3. 支持布尔查询与前缀匹配
"""
import sqlite3
conn = sqlite3.connect("knowledge.db")
cursor = conn.cursor()
# 假设表结构:CREATE VIRTUAL TABLE docs USING fts5(content)
cursor.execute(
"SELECT rowid, rank FROM docs WHERE docs MATCH ? ORDER BY rank LIMIT ?",
(query, k)
)
results = []
for rowid, rank in cursor.fetchall():
results.append({"id": str(rowid), "score": rank})
conn.close()
return results