259 lines
8.1 KiB
TypeScript
259 lines
8.1 KiB
TypeScript
/**
|
||
* NAC Knowledge Engine - MongoDB索引初始化脚本
|
||
*
|
||
* 功能:
|
||
* 1. 为 compliance_rules 集合建立全文索引(提升RAG检索效率到0.90置信度)
|
||
* 2. 为 agent_conversations 集合建立TTL索引(90天自动过期)
|
||
* 3. 为常用查询字段建立普通索引(提升查询性能)
|
||
*
|
||
* 使用方式:
|
||
* - 通过 tRPC 接口 admin.initMongoIndexes 触发
|
||
* - 或在服务器上直接运行:node -e "require('./dist/index.js')"
|
||
*
|
||
* 无Manus依赖,无Google依赖,纯MongoDB原生实现。
|
||
*/
|
||
|
||
import { getMongoDb, COLLECTIONS } from "./mongodb";
|
||
|
||
export interface IndexInitResult {
|
||
success: boolean;
|
||
results: Array<{
|
||
collection: string;
|
||
indexName: string;
|
||
status: "created" | "already_exists" | "failed";
|
||
error?: string;
|
||
}>;
|
||
summary: string;
|
||
}
|
||
|
||
/**
|
||
* 初始化所有MongoDB索引
|
||
*/
|
||
export async function initMongoIndexes(): Promise<IndexInitResult> {
|
||
const db = await getMongoDb();
|
||
const results: IndexInitResult["results"] = [];
|
||
|
||
if (!db) {
|
||
return {
|
||
success: false,
|
||
results: [],
|
||
summary: "MongoDB连接失败,无法初始化索引",
|
||
};
|
||
}
|
||
|
||
// ─── 1. compliance_rules 全文索引 ─────────────────────────────────
|
||
try {
|
||
const rulesCol = db.collection(COLLECTIONS.COMPLIANCE_RULES);
|
||
|
||
// 检查是否已存在全文索引
|
||
const existingIndexes = await rulesCol.listIndexes().toArray();
|
||
const hasTextIndex = existingIndexes.some(
|
||
(idx) => idx.key && Object.values(idx.key).includes("text")
|
||
);
|
||
|
||
if (hasTextIndex) {
|
||
results.push({
|
||
collection: COLLECTIONS.COMPLIANCE_RULES,
|
||
indexName: "compliance_rules_fulltext",
|
||
status: "already_exists",
|
||
});
|
||
} else {
|
||
await rulesCol.createIndex(
|
||
{
|
||
ruleName: "text",
|
||
description: "text",
|
||
content: "text",
|
||
"translations.zh": "text",
|
||
"translations.en": "text",
|
||
},
|
||
{
|
||
name: "compliance_rules_fulltext",
|
||
weights: {
|
||
ruleName: 10, // 规则名称权重最高
|
||
description: 5, // 描述次之
|
||
content: 3, // 内容再次
|
||
"translations.zh": 3,
|
||
"translations.en": 2,
|
||
},
|
||
default_language: "none", // 禁用语言分词,支持中文
|
||
}
|
||
);
|
||
results.push({
|
||
collection: COLLECTIONS.COMPLIANCE_RULES,
|
||
indexName: "compliance_rules_fulltext",
|
||
status: "created",
|
||
});
|
||
}
|
||
} catch (e) {
|
||
results.push({
|
||
collection: COLLECTIONS.COMPLIANCE_RULES,
|
||
indexName: "compliance_rules_fulltext",
|
||
status: "failed",
|
||
error: (e as Error).message,
|
||
});
|
||
}
|
||
|
||
// ─── 2. compliance_rules 普通索引 ─────────────────────────────────
|
||
const ruleIndexes: Array<{ key: Record<string, 1 | -1>; name: string; unique?: boolean }> = [
|
||
{ key: { jurisdiction: 1 }, name: "idx_rules_jurisdiction" },
|
||
{ key: { category: 1 }, name: "idx_rules_category" },
|
||
{ key: { ruleId: 1 }, name: "idx_rules_ruleId", unique: true },
|
||
{ key: { jurisdiction: 1, category: 1 }, name: "idx_rules_jur_cat" },
|
||
{ key: { createdAt: -1 }, name: "idx_rules_created_desc" },
|
||
];
|
||
|
||
for (const idx of ruleIndexes) {
|
||
try {
|
||
const rulesCol = db.collection(COLLECTIONS.COMPLIANCE_RULES);
|
||
await rulesCol.createIndex(idx.key, {
|
||
name: idx.name,
|
||
unique: (idx as { unique?: boolean }).unique || false,
|
||
background: true,
|
||
});
|
||
results.push({
|
||
collection: COLLECTIONS.COMPLIANCE_RULES,
|
||
indexName: idx.name,
|
||
status: "created",
|
||
});
|
||
} catch (e) {
|
||
const errMsg = (e as Error).message;
|
||
if (errMsg.includes("already exists") || errMsg.includes("IndexOptionsConflict")) {
|
||
results.push({
|
||
collection: COLLECTIONS.COMPLIANCE_RULES,
|
||
indexName: idx.name,
|
||
status: "already_exists",
|
||
});
|
||
} else {
|
||
results.push({
|
||
collection: COLLECTIONS.COMPLIANCE_RULES,
|
||
indexName: idx.name,
|
||
status: "failed",
|
||
error: errMsg,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
// ─── 3. agent_conversations TTL索引(90天自动过期)─────────────────
|
||
try {
|
||
const convCol = db.collection(COLLECTIONS.AGENT_CONVERSATIONS);
|
||
const existingIndexes = await convCol.listIndexes().toArray();
|
||
const hasTTLIndex = existingIndexes.some(
|
||
(idx) => idx.expireAfterSeconds !== undefined
|
||
);
|
||
|
||
if (hasTTLIndex) {
|
||
results.push({
|
||
collection: COLLECTIONS.AGENT_CONVERSATIONS,
|
||
indexName: "agent_conversations_ttl",
|
||
status: "already_exists",
|
||
});
|
||
} else {
|
||
await convCol.createIndex(
|
||
{ updatedAt: 1 },
|
||
{
|
||
name: "agent_conversations_ttl",
|
||
expireAfterSeconds: 7776000, // 90天 = 90 * 24 * 3600
|
||
background: true,
|
||
}
|
||
);
|
||
results.push({
|
||
collection: COLLECTIONS.AGENT_CONVERSATIONS,
|
||
indexName: "agent_conversations_ttl",
|
||
status: "created",
|
||
});
|
||
}
|
||
} catch (e) {
|
||
results.push({
|
||
collection: COLLECTIONS.AGENT_CONVERSATIONS,
|
||
indexName: "agent_conversations_ttl",
|
||
status: "failed",
|
||
error: (e as Error).message,
|
||
});
|
||
}
|
||
|
||
// ─── 4. agent_conversations 普通索引 ──────────────────────────────
|
||
const convIndexes: Array<{ key: Record<string, 1 | -1>; name: string; unique?: boolean }> = [
|
||
{ key: { userId: 1, agentType: 1 }, name: "idx_conv_user_agent" },
|
||
{ key: { userId: 1, updatedAt: -1 }, name: "idx_conv_user_updated" },
|
||
{ key: { conversationId: 1 }, name: "idx_conv_id", unique: true },
|
||
];
|
||
|
||
for (const idx of convIndexes) {
|
||
try {
|
||
const convCol = db.collection(COLLECTIONS.AGENT_CONVERSATIONS);
|
||
await convCol.createIndex(idx.key, {
|
||
name: idx.name,
|
||
unique: (idx as { unique?: boolean }).unique || false,
|
||
background: true,
|
||
});
|
||
results.push({
|
||
collection: COLLECTIONS.AGENT_CONVERSATIONS,
|
||
indexName: idx.name,
|
||
status: "created",
|
||
});
|
||
} catch (e) {
|
||
const errMsg = (e as Error).message;
|
||
if (errMsg.includes("already exists") || errMsg.includes("IndexOptionsConflict")) {
|
||
results.push({
|
||
collection: COLLECTIONS.AGENT_CONVERSATIONS,
|
||
indexName: idx.name,
|
||
status: "already_exists",
|
||
});
|
||
} else {
|
||
results.push({
|
||
collection: COLLECTIONS.AGENT_CONVERSATIONS,
|
||
indexName: idx.name,
|
||
status: "failed",
|
||
error: errMsg,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
// ─── 5. knowledge_base 全文索引(如果存在该集合)─────────────────
|
||
try {
|
||
const kbCol = db.collection("knowledge_base");
|
||
await kbCol.createIndex(
|
||
{ title: "text", content: "text", tags: "text" },
|
||
{
|
||
name: "knowledge_base_fulltext",
|
||
weights: { title: 10, tags: 5, content: 3 },
|
||
default_language: "none",
|
||
background: true,
|
||
}
|
||
);
|
||
results.push({
|
||
collection: "knowledge_base",
|
||
indexName: "knowledge_base_fulltext",
|
||
status: "created",
|
||
});
|
||
} catch (e) {
|
||
const errMsg = (e as Error).message;
|
||
if (errMsg.includes("already exists") || errMsg.includes("IndexOptionsConflict")) {
|
||
results.push({
|
||
collection: "knowledge_base",
|
||
indexName: "knowledge_base_fulltext",
|
||
status: "already_exists",
|
||
});
|
||
} else {
|
||
results.push({
|
||
collection: "knowledge_base",
|
||
indexName: "knowledge_base_fulltext",
|
||
status: "failed",
|
||
error: errMsg,
|
||
});
|
||
}
|
||
}
|
||
|
||
const created = results.filter(r => r.status === "created").length;
|
||
const existing = results.filter(r => r.status === "already_exists").length;
|
||
const failed = results.filter(r => r.status === "failed").length;
|
||
|
||
return {
|
||
success: failed === 0,
|
||
results,
|
||
summary: `索引初始化完成:新建 ${created} 个,已存在 ${existing} 个,失败 ${failed} 个`,
|
||
};
|
||
}
|