[{"data":1,"prerenderedAt":7929},["ShallowReactive",2],{"blog-post-/docs/langchain":3},{"id":4,"title":5,"body":6,"description":7918,"extension":7919,"meta":7920,"navigation":228,"ogImage":7922,"path":7925,"seo":7926,"stem":7927,"__hash__":7928},"content/docs/LangChain.md","Langchain",{"type":7,"value":8,"toc":7884},"minimark",[9,14,18,57,62,67,72,75,109,112,121,129,135,138,186,191,194,317,323,326,330,341,347,354,382,384,391,398,416,419,591,594,598,601,611,616,640,650,652,657,664,677,950,956,972,974,983,986,1028,1030,1035,1041,1187,1189,1197,1200,1249,1251,1256,1262,1305,1307,1312,1343,1347,1353,1359,1391,1393,1397,1405,1411,1429,1434,1477,1529,1531,1536,1539,1553,1561,1567,1583,1591,1597,1613,1680,1682,1687,1694,1779,1788,1806,1808,1881,1885,1891,1898,1926,1931,1941,2126,2131,2142,2149,2342,2347,2358,2380,2384,2387,2406,2411,2417,2450,2520,2525,2532,2543,2548,2551,2571,2642,2644,2649,2652,2672,2674,2679,2682,2688,2698,2706,2708,2712,2725,2732,2737,2766,2771,2774,3008,3011,3013,3017,3027,3041,3079,3081,3086,3091,3094,3125,3127,3132,3135,3400,3404,3417,3422,3438,3443,3446,3747,3752,3766,3770,3773,3776,3793,3798,3801,4095,4100,4119,4121,4125,4135,4145,4150,4384,4386,4390,4393,4400,4495,4506,4510,4517,4519,4526,4529,4538,4545,4625,4631,4641,4644,4751,4761,4774,4779,4965,4967,4971,4988,4998,5010,5015,5029,5141,5143,5147,5150,5156,5161,5167,5170,5195,5200,5203,5205,5210,5219,5226,5373,5380,5383,5577,5584,5594,5816,5818,5823,5826,5833,5850,5859,5866,5876,5888,5895,5898,5915,5920,5926,5940,5943,5945,5949,5956,5961,5972,6049,6054,6089,6091,6095,6100,6118,6124,6154,6164,6168,6171,6177,6180,6199,6202,6211,6217,6234,6244,6250,6262,6267,6401,6406,6418,6423,6429,6435,6438,6443,6470,6474,6486,6488,6495,6505,6508,6512,6514,6519,6529,6543,6556,6561,6568,6582,6587,6822,6827,6830,6868,6878,6880,6883,6886,6889,6893,6904,6914,6919,6926,6929,7003,7010,7013,7055,7057,7062,7065,7068,7073,7084,7089,7240,7245,7272,7278,7280,7285,7291,7305,7315,7317,7320,7323,7334,7338,7345,7348,7354,7375,7381,7402,7408,7421,7425,7435,7441,7444,7479,7484,7547,7552,7610,7613,7618,7632,7872,7878,7880],[10,11,13],"h1",{"id":12},"langchain","LangChain",[15,16,17],"p",{},"我们可以将 LangChain 看作是一个由多个功能模块组成的“工具箱”：",[19,20,21,29,35,45,51],"ol",{},[22,23,24,28],"li",{},[25,26,27],"strong",{},"Models（模型接口）"," 🧠：这是大脑，负责处理文本输入并生成输出。LangChain 统一了不同模型供应商（如 OpenAI, Anthropic, HuggingFace）的调用方式。",[22,30,31,34],{},[25,32,33],{},"Prompts（提示词模板）"," 📝：这是方向盘，通过模版化的方式管理输入，确保大模型能按预期的逻辑工作。",[22,36,37,40,41,44],{},[25,38,39],{},"Chains（链）"," ⛓️：这是流水线，通过 ",[25,42,43],{},"LCEL (LangChain Expression Language)"," 将模型、提示词和数据处理逻辑串联起来。",[22,46,47,50],{},[25,48,49],{},"Retrieval（检索系统）"," 📚：这是外挂知识库（RAG），让模型能够访问它训练数据之外的实时或私有信息。",[22,52,53,56],{},[25,54,55],{},"Agents（智能体）"," 🤖：这是高级形态，模型不仅会思考，还能根据需求选择并调用工具（如搜索、计算器、运行代码）。",[58,59,61],"h2",{"id":60},"_1model","1.Model",[63,64,66],"h3",{"id":65},"_11-统一的模型接口调用","1.1 统一的模型接口调用",[15,68,69],{},[25,70,71],{},"痛点：被厂商绑架的 API ⛓️",[15,73,74],{},"在没有 LangChain 之前，如果你想在你的应用中同时支持 OpenAI、Anthropic (Claude) 和 Google (Gemini) 的模型，你会面临一个巨大的工程灾难：",[76,77,78,89,95],"ul",{},[22,79,80,83,84,88],{},[25,81,82],{},"OpenAI"," 的输入格式是 ",[85,86,87],"code",{},"{\"role\": \"user\", \"content\": \"...\"}","。",[22,90,91,94],{},[25,92,93],{},"Anthropic"," 的 API 结构和鉴权方式完全不同。",[22,96,97,100,101,104,105,108],{},[25,98,99],{},"Gemini"," 又有自己独特的消息结构（比如 ",[85,102,103],{},"parts"," 和 ",[85,106,107],{},"role","）。",[15,110,111],{},"这意味着，如果你要切换底层模型，你的业务代码、历史记录管理和输出解析代码几乎要重写一遍。",[15,113,114],{},[25,115,116,117,120],{},"解决方案：",[85,118,119],{},"BaseChatModel"," 抽象类 🔌",[15,122,123,124,88],{},"LangChain 解决这个问题的核心在于它的底层基础包：",[25,125,126],{},[85,127,128],{},"langchain-core",[15,130,131,132,134],{},"在这个包里，LangChain 定义了一个极其重要的抽象基类：",[85,133,119],{},"。所有的模型厂商（或者社区开发者）要想把自己的模型接入 LangChain，就必须继承这个基类，并在内部把自己的特有 API 翻译成 LangChain 的标准格式。",[15,136,137],{},"这样做带来了三大统一：",[19,139,140,146,168],{},[22,141,142,145],{},[25,143,144],{},"统一的初始化接口","：虽然不同模型需要的 API Key 不同，但初始化的模式是一致的。",[22,147,148,151,152,155,156,159,160,163,164,167],{},[25,149,150],{},"统一的输入输出类型","：无论底层是哪个大模型，它们",[25,153,154],{},"只接受"," ",[85,157,158],{},"BaseMessage"," 列表，并且",[25,161,162],{},"只返回","统一的 ",[85,165,166],{},"AIMessage"," 对象。",[22,169,170,173,174,177,178,181,182,185],{},[25,171,172],{},"统一的调用方法 (Runnable 协议)","：所有模型都强制拥有 ",[85,175,176],{},".invoke()","（单次调用）、",[85,179,180],{},".stream()","（流式输出）、",[85,183,184],{},".batch()","（批量处理）等标准方法。",[15,187,188],{},[25,189,190],{},"代码实操：无缝切换底层模型",[15,192,193],{},"让我们来看看在代码层面，这种统一有多么优雅：",[195,196,201],"pre",{"className":197,"code":198,"language":199,"meta":200,"style":200},"language-Python shiki shiki-themes github-light github-dark","from langchain_core.messages import HumanMessage\nfrom langchain_openai import ChatOpenAI\nfrom langchain_anthropic import ChatAnthropic\n\n# 1. 初始化不同的模型实例 (这是唯一需要针对厂商修改的地方)\nllm_openai = ChatOpenAI(model=\"gpt-4-turbo\", temperature=0)\nllm_claude = ChatAnthropic(model=\"claude-3-opus-20240229\", temperature=0)\n\n# 2. 统一的输入：构建标准的 Message 列表\nmessages = [HumanMessage(content=\"用一句话解释什么是量子纠缠。\")]\n\n# 3. 统一的执行逻辑：下游的业务代码完全不需要知道底层是哪个模型\ndef get_ai_response(model, input_messages):\n    # 无论是 OpenAI 还是 Claude，调用的都是标准的 invoke\n    return model.invoke(input_messages)\n\n# 无论传哪个模型，业务逻辑都能完美运行\nresult_from_openai = get_ai_response(llm_openai, messages)\nresult_from_claude = get_ai_response(llm_claude, messages)\n","Python","",[85,202,203,211,217,223,230,236,242,248,253,259,265,270,276,282,288,294,299,305,311],{"__ignoreMap":200},[204,205,208],"span",{"class":206,"line":207},"line",1,[204,209,210],{},"from langchain_core.messages import HumanMessage\n",[204,212,214],{"class":206,"line":213},2,[204,215,216],{},"from langchain_openai import ChatOpenAI\n",[204,218,220],{"class":206,"line":219},3,[204,221,222],{},"from langchain_anthropic import ChatAnthropic\n",[204,224,226],{"class":206,"line":225},4,[204,227,229],{"emptyLinePlaceholder":228},true,"\n",[204,231,233],{"class":206,"line":232},5,[204,234,235],{},"# 1. 初始化不同的模型实例 (这是唯一需要针对厂商修改的地方)\n",[204,237,239],{"class":206,"line":238},6,[204,240,241],{},"llm_openai = ChatOpenAI(model=\"gpt-4-turbo\", temperature=0)\n",[204,243,245],{"class":206,"line":244},7,[204,246,247],{},"llm_claude = ChatAnthropic(model=\"claude-3-opus-20240229\", temperature=0)\n",[204,249,251],{"class":206,"line":250},8,[204,252,229],{"emptyLinePlaceholder":228},[204,254,256],{"class":206,"line":255},9,[204,257,258],{},"# 2. 统一的输入：构建标准的 Message 列表\n",[204,260,262],{"class":206,"line":261},10,[204,263,264],{},"messages = [HumanMessage(content=\"用一句话解释什么是量子纠缠。\")]\n",[204,266,268],{"class":206,"line":267},11,[204,269,229],{"emptyLinePlaceholder":228},[204,271,273],{"class":206,"line":272},12,[204,274,275],{},"# 3. 统一的执行逻辑：下游的业务代码完全不需要知道底层是哪个模型\n",[204,277,279],{"class":206,"line":278},13,[204,280,281],{},"def get_ai_response(model, input_messages):\n",[204,283,285],{"class":206,"line":284},14,[204,286,287],{},"    # 无论是 OpenAI 还是 Claude，调用的都是标准的 invoke\n",[204,289,291],{"class":206,"line":290},15,[204,292,293],{},"    return model.invoke(input_messages)\n",[204,295,297],{"class":206,"line":296},16,[204,298,229],{"emptyLinePlaceholder":228},[204,300,302],{"class":206,"line":301},17,[204,303,304],{},"# 无论传哪个模型，业务逻辑都能完美运行\n",[204,306,308],{"class":206,"line":307},18,[204,309,310],{},"result_from_openai = get_ai_response(llm_openai, messages)\n",[204,312,314],{"class":206,"line":313},19,[204,315,316],{},"result_from_claude = get_ai_response(llm_claude, messages)\n",[15,318,319,320,88],{},"通过这种设计，你可以通过修改配置文件中的一行代码，瞬间把整个系统的底层引擎从 OpenAI 换成私有化部署的开源模型（比如 Llama 3），而你的上层业务逻辑（提示词组装、输出解析、Agent 循环）",[25,321,322],{},"一行都不用改",[324,325],"hr",{},[63,327,329],{"id":328},"_12-prompt-templates工业级的组装流水线","1.2 Prompt Templates：工业级的组装流水线",[15,331,332,333,335,336,88],{},"现在，我们理解了输入和接口的统一。既然所有模型调用 ",[85,334,176],{}," 后，返回的都不再是各大厂商乱七八糟的 JSON 响应，而是一个被 LangChain 统一封装好的 ",[25,337,338,340],{},[85,339,166],{}," 对象",[15,342,343,344,88],{},"在早期的开发中，模型主要接收一串长文本并续写（LLMs）。但在目前的工程实践中，我们几乎全部转向了 ",[25,345,346],{},"Chat Models（聊天模型）",[15,348,349,350,353],{},"Chat Models 的底层逻辑是：",[25,351,352],{},"输入和输出不再是单一的字符串，而是一个“消息列表 (List of Messages)”","。为了规范化，LangChain 定义了三种最核心的消息对象：",[76,355,356,365,374],{},[22,357,358,364],{},[25,359,360,363],{},[85,361,362],{},"SystemMessage"," (系统消息)","：定义全局人设、规则和背景约束（例如：“你是一个只能输出 JSON 的翻译官”）。通常放在列表的第一位。",[22,366,367,373],{},[25,368,369,372],{},[85,370,371],{},"HumanMessage"," (人类消息)","：用户的当前输入或指令。",[22,375,376,381],{},[25,377,378,380],{},[85,379,166],{}," (AI 消息)","：模型之前的历史回复内容。",[324,383],{},[15,385,386,387,390],{},"你可能会问：既然只是拼接消息，为什么不用 Python 的 ",[85,388,389],{},"f-string","？",[15,392,393,394,397],{},"在生产级应用中，",[25,395,396],{},"Prompt Templates"," 提供了不可替代的工程价值：",[76,399,400,410],{},[22,401,402,405,406,104,408,167],{},[25,403,404],{},"自动角色转换","：它可以将简单的变量字典安全地转换为上述的 ",[85,407,362],{},[85,409,371],{},[22,411,412,415],{},[25,413,414],{},"变量校验","：如果下游缺少必要的参数，它会在组装阶段直接抛出异常，而不是让残缺的提示词进入大模型浪费 Token。",[15,417,418],{},"让我们看一个基础的组装代码：",[195,420,424],{"className":421,"code":422,"language":423,"meta":200,"style":200},"language-python shiki shiki-themes github-light github-dark","from langchain_core.prompts import ChatPromptTemplate\nfrom langchain_openai import ChatOpenAI\n\n# 1. 使用元组列表快速构建包含不同角色的 Prompt 模板\nprompt_template = ChatPromptTemplate.from_messages([\n    (\"system\", \"你是一个精通 {language} 的资深工程师。\"),\n    (\"human\", \"请解释什么是 {concept}。\")\n])\n\n# 2. 注入变量，生成实际的消息列表\nmessages = prompt_template.invoke({\n    \"language\": \"Python\",\n    \"concept\": \"装饰器\"\n})\n\nprint(messages)\n# 输出: [SystemMessage(content='你是一个精通 Python 的资深工程师。'), HumanMessage(content='请解释什么是 装饰器。')]\n","python",[85,425,426,442,454,458,464,475,500,521,526,530,535,545,559,569,574,578,586],{"__ignoreMap":200},[204,427,428,432,436,439],{"class":206,"line":207},[204,429,431],{"class":430},"szBVR","from",[204,433,435],{"class":434},"sVt8B"," langchain_core.prompts ",[204,437,438],{"class":430},"import",[204,440,441],{"class":434}," ChatPromptTemplate\n",[204,443,444,446,449,451],{"class":206,"line":213},[204,445,431],{"class":430},[204,447,448],{"class":434}," langchain_openai ",[204,450,438],{"class":430},[204,452,453],{"class":434}," ChatOpenAI\n",[204,455,456],{"class":206,"line":219},[204,457,229],{"emptyLinePlaceholder":228},[204,459,460],{"class":206,"line":225},[204,461,463],{"class":462},"sJ8bj","# 1. 使用元组列表快速构建包含不同角色的 Prompt 模板\n",[204,465,466,469,472],{"class":206,"line":232},[204,467,468],{"class":434},"prompt_template ",[204,470,471],{"class":430},"=",[204,473,474],{"class":434}," ChatPromptTemplate.from_messages([\n",[204,476,477,480,484,487,490,494,497],{"class":206,"line":238},[204,478,479],{"class":434},"    (",[204,481,483],{"class":482},"sZZnC","\"system\"",[204,485,486],{"class":434},", ",[204,488,489],{"class":482},"\"你是一个精通 ",[204,491,493],{"class":492},"sj4cs","{language}",[204,495,496],{"class":482}," 的资深工程师。\"",[204,498,499],{"class":434},"),\n",[204,501,502,504,507,509,512,515,518],{"class":206,"line":244},[204,503,479],{"class":434},[204,505,506],{"class":482},"\"human\"",[204,508,486],{"class":434},[204,510,511],{"class":482},"\"请解释什么是 ",[204,513,514],{"class":492},"{concept}",[204,516,517],{"class":482},"。\"",[204,519,520],{"class":434},")\n",[204,522,523],{"class":206,"line":250},[204,524,525],{"class":434},"])\n",[204,527,528],{"class":206,"line":255},[204,529,229],{"emptyLinePlaceholder":228},[204,531,532],{"class":206,"line":261},[204,533,534],{"class":462},"# 2. 注入变量，生成实际的消息列表\n",[204,536,537,540,542],{"class":206,"line":267},[204,538,539],{"class":434},"messages ",[204,541,471],{"class":430},[204,543,544],{"class":434}," prompt_template.invoke({\n",[204,546,547,550,553,556],{"class":206,"line":272},[204,548,549],{"class":482},"    \"language\"",[204,551,552],{"class":434},": ",[204,554,555],{"class":482},"\"Python\"",[204,557,558],{"class":434},",\n",[204,560,561,564,566],{"class":206,"line":278},[204,562,563],{"class":482},"    \"concept\"",[204,565,552],{"class":434},[204,567,568],{"class":482},"\"装饰器\"\n",[204,570,571],{"class":206,"line":284},[204,572,573],{"class":434},"})\n",[204,575,576],{"class":206,"line":290},[204,577,229],{"emptyLinePlaceholder":228},[204,579,580,583],{"class":206,"line":296},[204,581,582],{"class":492},"print",[204,584,585],{"class":434},"(messages)\n",[204,587,588],{"class":206,"line":301},[204,589,590],{"class":462},"# 输出: [SystemMessage(content='你是一个精通 Python 的资深工程师。'), HumanMessage(content='请解释什么是 装饰器。')]\n",[15,592,593],{},"这些结构化的消息，正是大模型最喜欢的数据格式。",[63,595,597],{"id":596},"_13-output-parsers走向强类型利用原生工具调用","1.3 Output Parsers：走向强类型，利用原生工具调用",[15,599,600],{},"在早期，我们让模型输出 JSON 的方式是这样的：",[602,603,604],"blockquote",{},[15,605,606,607,610],{},"“请输出 JSON，格式如下：",[85,608,609],{},"{\"name\": \"...\", \"age\": ...}","。不要输出多余的解释文字！”",[15,612,613],{},[25,614,615],{},"工程痛点：",[19,617,618,628,634],{},[22,619,620,623,624,627],{},[25,621,622],{},"幻觉前缀","：模型偶尔会说“好的，这是你要的 JSON：...”，导致 ",[85,625,626],{},"json.loads"," 报错。",[22,629,630,633],{},[25,631,632],{},"结构不稳","：在高并发下，模型可能会漏掉一个引号或逗号。",[22,635,636,639],{},[25,637,638],{},"维护困难","：字段一旦增多，Prompt 会变得极其臃肿。",[15,641,642,645,646,649],{},[25,643,644],{},"现代方案","：利用 OpenAI、Anthropic 等模型原生的 ",[25,647,648],{},"Function Calling"," 接口。你不再是通过“求”模型给 JSON，而是给它定义一个“数据契约”，模型在 API 层面就被限制只能按照这个契约填充数据。",[324,651],{},[15,653,654],{},[25,655,656],{},"第一步：定义强类型契约 (Pydantic Schema)",[15,658,659,660,663],{},"在 Python 工程中，",[25,661,662],{},"Pydantic"," 是数据校验的事实标准。在 LangChain 中，它承担了“协议定义”的角色。",[15,665,666,667,104,670,673,674,88],{},"请注意代码中的 ",[85,668,669],{},"Field",[85,671,672],{},"description","，这是",[25,675,676],{},"模型理解字段含义的唯一依据",[195,678,680],{"className":421,"code":679,"language":423,"meta":200,"style":200},"from typing import List, Optional, Literal\nfrom pydantic import BaseModel, Field\n\n# 定义一个复杂的输出结构\nclass FinancialReport(BaseModel):\n    \"\"\"提取财务报告中的核心指标\"\"\" # 类的文档字符串也会发送给模型\n    \n    company_name: str = Field(..., description=\"公司的完整法定名称\")\n    \n    # 使用 Literal 限制模型只能在指定范围内选择，防止模型乱编\n    sentiment: Literal[\"positive\", \"neutral\", \"negative\"] = Field(\n        description=\"报告表现出的整体情感极调\"\n    )\n    \n    # 使用 Optional 处理缺失数据，这是工程防御的关键\n    revenue: Optional[float] = Field(\n        None, description=\"营业收入金额（单位：亿元）。如果文中未提及，请设为 null\"\n    )\n    \n    key_risks: List[str] = Field(\n        default_factory=list, description=\"报告中提到的风险点列表，至少提取3条\"\n    )\n\n    # 嵌套结构：展示更复杂的逻辑\n    audit_info: Optional[dict] = Field(\n        description=\"包含 'auditor' (审计师) 和 'opinion' (审计意见) 的字典\"\n    )\n",[85,681,682,694,706,710,715,733,741,746,775,779,784,810,820,825,829,834,848,862,866,870,884,904,909,914,920,935,945],{"__ignoreMap":200},[204,683,684,686,689,691],{"class":206,"line":207},[204,685,431],{"class":430},[204,687,688],{"class":434}," typing ",[204,690,438],{"class":430},[204,692,693],{"class":434}," List, Optional, Literal\n",[204,695,696,698,701,703],{"class":206,"line":213},[204,697,431],{"class":430},[204,699,700],{"class":434}," pydantic ",[204,702,438],{"class":430},[204,704,705],{"class":434}," BaseModel, Field\n",[204,707,708],{"class":206,"line":219},[204,709,229],{"emptyLinePlaceholder":228},[204,711,712],{"class":206,"line":225},[204,713,714],{"class":462},"# 定义一个复杂的输出结构\n",[204,716,717,720,724,727,730],{"class":206,"line":232},[204,718,719],{"class":430},"class",[204,721,723],{"class":722},"sScJk"," FinancialReport",[204,725,726],{"class":434},"(",[204,728,729],{"class":722},"BaseModel",[204,731,732],{"class":434},"):\n",[204,734,735,738],{"class":206,"line":238},[204,736,737],{"class":482},"    \"\"\"提取财务报告中的核心指标\"\"\"",[204,739,740],{"class":462}," # 类的文档字符串也会发送给模型\n",[204,742,743],{"class":206,"line":244},[204,744,745],{"class":434},"    \n",[204,747,748,751,754,757,760,763,765,768,770,773],{"class":206,"line":250},[204,749,750],{"class":434},"    company_name: ",[204,752,753],{"class":492},"str",[204,755,756],{"class":430}," =",[204,758,759],{"class":434}," Field(",[204,761,762],{"class":492},"...",[204,764,486],{"class":434},[204,766,672],{"class":767},"s4XuR",[204,769,471],{"class":430},[204,771,772],{"class":482},"\"公司的完整法定名称\"",[204,774,520],{"class":434},[204,776,777],{"class":206,"line":255},[204,778,745],{"class":434},[204,780,781],{"class":206,"line":261},[204,782,783],{"class":462},"    # 使用 Literal 限制模型只能在指定范围内选择，防止模型乱编\n",[204,785,786,789,792,794,797,799,802,805,807],{"class":206,"line":267},[204,787,788],{"class":434},"    sentiment: Literal[",[204,790,791],{"class":482},"\"positive\"",[204,793,486],{"class":434},[204,795,796],{"class":482},"\"neutral\"",[204,798,486],{"class":434},[204,800,801],{"class":482},"\"negative\"",[204,803,804],{"class":434},"] ",[204,806,471],{"class":430},[204,808,809],{"class":434}," Field(\n",[204,811,812,815,817],{"class":206,"line":272},[204,813,814],{"class":767},"        description",[204,816,471],{"class":430},[204,818,819],{"class":482},"\"报告表现出的整体情感极调\"\n",[204,821,822],{"class":206,"line":278},[204,823,824],{"class":434},"    )\n",[204,826,827],{"class":206,"line":284},[204,828,745],{"class":434},[204,830,831],{"class":206,"line":290},[204,832,833],{"class":462},"    # 使用 Optional 处理缺失数据，这是工程防御的关键\n",[204,835,836,839,842,844,846],{"class":206,"line":296},[204,837,838],{"class":434},"    revenue: Optional[",[204,840,841],{"class":492},"float",[204,843,804],{"class":434},[204,845,471],{"class":430},[204,847,809],{"class":434},[204,849,850,853,855,857,859],{"class":206,"line":301},[204,851,852],{"class":492},"        None",[204,854,486],{"class":434},[204,856,672],{"class":767},[204,858,471],{"class":430},[204,860,861],{"class":482},"\"营业收入金额（单位：亿元）。如果文中未提及，请设为 null\"\n",[204,863,864],{"class":206,"line":307},[204,865,824],{"class":434},[204,867,868],{"class":206,"line":313},[204,869,745],{"class":434},[204,871,873,876,878,880,882],{"class":206,"line":872},20,[204,874,875],{"class":434},"    key_risks: List[",[204,877,753],{"class":492},[204,879,804],{"class":434},[204,881,471],{"class":430},[204,883,809],{"class":434},[204,885,887,890,892,895,897,899,901],{"class":206,"line":886},21,[204,888,889],{"class":767},"        default_factory",[204,891,471],{"class":430},[204,893,894],{"class":492},"list",[204,896,486],{"class":434},[204,898,672],{"class":767},[204,900,471],{"class":430},[204,902,903],{"class":482},"\"报告中提到的风险点列表，至少提取3条\"\n",[204,905,907],{"class":206,"line":906},22,[204,908,824],{"class":434},[204,910,912],{"class":206,"line":911},23,[204,913,229],{"emptyLinePlaceholder":228},[204,915,917],{"class":206,"line":916},24,[204,918,919],{"class":462},"    # 嵌套结构：展示更复杂的逻辑\n",[204,921,923,926,929,931,933],{"class":206,"line":922},25,[204,924,925],{"class":434},"    audit_info: Optional[",[204,927,928],{"class":492},"dict",[204,930,804],{"class":434},[204,932,471],{"class":430},[204,934,809],{"class":434},[204,936,938,940,942],{"class":206,"line":937},26,[204,939,814],{"class":767},[204,941,471],{"class":430},[204,943,944],{"class":482},"\"包含 'auditor' (审计师) 和 'opinion' (审计意见) 的字典\"\n",[204,946,948],{"class":206,"line":947},27,[204,949,824],{"class":434},[15,951,952,955],{},[25,953,954],{},"资深工程师笔记","：",[76,957,958,967],{},[22,959,960,962,963,966],{},[85,961,762],{}," 代表必填项，",[85,964,965],{},"None"," 代表可选。",[22,968,969,971],{},[85,970,672],{}," 是给 AI 读的“微型提示词”。例如，你可以通过描述告诉 AI 如何换算单位（如“将万元折算为亿元”）。",[324,973],{},[15,975,976],{},[25,977,978,979,982],{},"第二步：绑定模型与方法选择 (",[85,980,981],{},"with_structured_output",")",[15,984,985],{},"LangChain 的新版接口将模型包装成了一个“结构化输出器”。",[195,987,989],{"className":197,"code":988,"language":199,"meta":200,"style":200},"from langchain_openai import ChatOpenAI\n\nllm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n\n# 核心：将 Pydantic 类绑定到模型\n# method 参数通常有 'function_calling' 或 'json_mode'\n# 'function_calling' 是最稳定的，因为它利用了模型专门训练过的工具调用路径\nstructured_llm = llm.with_structured_output(FinancialReport, method=\"function_calling\")\n",[85,990,991,995,999,1004,1008,1013,1018,1023],{"__ignoreMap":200},[204,992,993],{"class":206,"line":207},[204,994,216],{},[204,996,997],{"class":206,"line":213},[204,998,229],{"emptyLinePlaceholder":228},[204,1000,1001],{"class":206,"line":219},[204,1002,1003],{},"llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n",[204,1005,1006],{"class":206,"line":225},[204,1007,229],{"emptyLinePlaceholder":228},[204,1009,1010],{"class":206,"line":232},[204,1011,1012],{},"# 核心：将 Pydantic 类绑定到模型\n",[204,1014,1015],{"class":206,"line":238},[204,1016,1017],{},"# method 参数通常有 'function_calling' 或 'json_mode'\n",[204,1019,1020],{"class":206,"line":244},[204,1021,1022],{},"# 'function_calling' 是最稳定的，因为它利用了模型专门训练过的工具调用路径\n",[204,1024,1025],{"class":206,"line":250},[204,1026,1027],{},"structured_llm = llm.with_structured_output(FinancialReport, method=\"function_calling\")\n",[324,1029],{},[15,1031,1032],{},[25,1033,1034],{},"第三步：处理真实世界的“不确定性” (Error Handling)",[15,1036,1037,1038,88],{},"即使使用了原生工具调用，模型依然可能失败（例如：输入太乱导致模型无法提取必填字段）。在工程化应用中，我们需要一层",[25,1039,1040],{},"异常拦截",[195,1042,1044],{"className":197,"code":1043,"language":199,"meta":200,"style":200},"from langchain_core.prompts import ChatPromptTemplate\nfrom pydantic import ValidationError\n\nprompt = ChatPromptTemplate.from_template(\"请分析以下文本并提取数据：\\n\\n{input}\")\n\n# 构建链\nchain = prompt | structured_llm\n\ndef safe_extract(text: str):\n    try:\n        # invoke 返回的是一个 FinancialReport 的对象实例\n        response = chain.invoke({\"input\": text})\n        return response\n    except ValidationError as e:\n        # 当模型返回的格式不符合 Pydantic 要求时触发\n        print(f\"数据校验失败: {e}\")\n        return None\n    except Exception as e:\n        print(f\"其他错误: {e}\")\n        return None\n\n# 测试：传入一段不完整的文本\nraw_data = \"本季度腾讯表现不错，虽然没说具体挣了多少钱，但大家都很有信心。\"\nresult = safe_extract(raw_data)\n\nif result:\n    print(f\"公司: {result.company_name}\")\n    print(f\"营收: {result.revenue}\") # 这里会输出 None，而不是报错崩溃\n    print(f\"情感: {result.sentiment}\")\n",[85,1045,1046,1051,1056,1060,1065,1069,1074,1079,1083,1088,1093,1098,1103,1108,1113,1118,1123,1128,1133,1138,1142,1146,1151,1156,1161,1165,1170,1175,1181],{"__ignoreMap":200},[204,1047,1048],{"class":206,"line":207},[204,1049,1050],{},"from langchain_core.prompts import ChatPromptTemplate\n",[204,1052,1053],{"class":206,"line":213},[204,1054,1055],{},"from pydantic import ValidationError\n",[204,1057,1058],{"class":206,"line":219},[204,1059,229],{"emptyLinePlaceholder":228},[204,1061,1062],{"class":206,"line":225},[204,1063,1064],{},"prompt = ChatPromptTemplate.from_template(\"请分析以下文本并提取数据：\\n\\n{input}\")\n",[204,1066,1067],{"class":206,"line":232},[204,1068,229],{"emptyLinePlaceholder":228},[204,1070,1071],{"class":206,"line":238},[204,1072,1073],{},"# 构建链\n",[204,1075,1076],{"class":206,"line":244},[204,1077,1078],{},"chain = prompt | structured_llm\n",[204,1080,1081],{"class":206,"line":250},[204,1082,229],{"emptyLinePlaceholder":228},[204,1084,1085],{"class":206,"line":255},[204,1086,1087],{},"def safe_extract(text: str):\n",[204,1089,1090],{"class":206,"line":261},[204,1091,1092],{},"    try:\n",[204,1094,1095],{"class":206,"line":267},[204,1096,1097],{},"        # invoke 返回的是一个 FinancialReport 的对象实例\n",[204,1099,1100],{"class":206,"line":272},[204,1101,1102],{},"        response = chain.invoke({\"input\": text})\n",[204,1104,1105],{"class":206,"line":278},[204,1106,1107],{},"        return response\n",[204,1109,1110],{"class":206,"line":284},[204,1111,1112],{},"    except ValidationError as e:\n",[204,1114,1115],{"class":206,"line":290},[204,1116,1117],{},"        # 当模型返回的格式不符合 Pydantic 要求时触发\n",[204,1119,1120],{"class":206,"line":296},[204,1121,1122],{},"        print(f\"数据校验失败: {e}\")\n",[204,1124,1125],{"class":206,"line":301},[204,1126,1127],{},"        return None\n",[204,1129,1130],{"class":206,"line":307},[204,1131,1132],{},"    except Exception as e:\n",[204,1134,1135],{"class":206,"line":313},[204,1136,1137],{},"        print(f\"其他错误: {e}\")\n",[204,1139,1140],{"class":206,"line":872},[204,1141,1127],{},[204,1143,1144],{"class":206,"line":886},[204,1145,229],{"emptyLinePlaceholder":228},[204,1147,1148],{"class":206,"line":906},[204,1149,1150],{},"# 测试：传入一段不完整的文本\n",[204,1152,1153],{"class":206,"line":911},[204,1154,1155],{},"raw_data = \"本季度腾讯表现不错，虽然没说具体挣了多少钱，但大家都很有信心。\"\n",[204,1157,1158],{"class":206,"line":916},[204,1159,1160],{},"result = safe_extract(raw_data)\n",[204,1162,1163],{"class":206,"line":922},[204,1164,229],{"emptyLinePlaceholder":228},[204,1166,1167],{"class":206,"line":937},[204,1168,1169],{},"if result:\n",[204,1171,1172],{"class":206,"line":947},[204,1173,1174],{},"    print(f\"公司: {result.company_name}\")\n",[204,1176,1178],{"class":206,"line":1177},28,[204,1179,1180],{},"    print(f\"营收: {result.revenue}\") # 这里会输出 None，而不是报错崩溃\n",[204,1182,1184],{"class":206,"line":1183},29,[204,1185,1186],{},"    print(f\"情感: {result.sentiment}\")\n",[324,1188],{},[15,1190,1191],{},[25,1192,1193,1194,1196],{},"深度：",[85,1195,981],{}," 到底做了什么？",[15,1198,1199],{},"作为一个资深工程师，你需要知道底层发生了什么：",[19,1201,1202,1215,1229,1239],{},[22,1203,1204,1207,1208,1210,1211,1214],{},[25,1205,1206],{},"Schema 转换","：LangChain 将你的 ",[85,1209,662],{}," 类转换成了 OpenAI 要求的 ",[25,1212,1213],{},"JSON Schema"," 格式。",[22,1216,1217,1220,1221,1224,1225,1228],{},[25,1218,1219],{},"API 注入","：在调用模型 API 时，这个 Schema 会被放入 ",[85,1222,1223],{},"tools"," 或 ",[85,1226,1227],{},"response_format"," 参数中。",[22,1230,1231,1234,1235,1238],{},[25,1232,1233],{},"强制引导","：模型在生成每一个 Token 时，都会受到这个 Schema 的约束（比如定义了 ",[85,1236,1237],{},"int","，它就不能输出字母）。",[22,1240,1241,1244,1245,1248],{},[25,1242,1243],{},"自动实例化","：当 API 返回 JSON 字符串后，LangChain 会自动调用 ",[85,1246,1247],{},"FinancialReport(**json_data)","，将字符串变成一个你可以直接点出属性的 Python 对象。",[324,1250],{},[15,1252,1253],{},[25,1254,1255],{},"工程进阶：多模型适配与 Fallback",[15,1257,1258,1259,88],{},"如果你担心某个模型（如 GPT-3.5）提取能力不够，导致解析失败，你可以构建一个",[25,1260,1261],{},"备份链",[195,1263,1265],{"className":197,"code":1264,"language":199,"meta":200,"style":200},"# 定义备选模型（更强大的 GPT-4）\nfallback_llm = ChatOpenAI(model=\"gpt-4o\").with_structured_output(FinancialReport)\n\n# 使用 .with_fallbacks 建立容错机制\nrobust_chain = chain.with_fallbacks([fallback_llm])\n\n# 逻辑：先试用便宜的模型，如果解析报错，自动调用贵但聪明的模型重试\nresult = robust_chain.invoke({\"input\": \"一段复杂的财务文本...\"})\n",[85,1266,1267,1272,1277,1281,1286,1291,1295,1300],{"__ignoreMap":200},[204,1268,1269],{"class":206,"line":207},[204,1270,1271],{},"# 定义备选模型（更强大的 GPT-4）\n",[204,1273,1274],{"class":206,"line":213},[204,1275,1276],{},"fallback_llm = ChatOpenAI(model=\"gpt-4o\").with_structured_output(FinancialReport)\n",[204,1278,1279],{"class":206,"line":219},[204,1280,229],{"emptyLinePlaceholder":228},[204,1282,1283],{"class":206,"line":225},[204,1284,1285],{},"# 使用 .with_fallbacks 建立容错机制\n",[204,1287,1288],{"class":206,"line":232},[204,1289,1290],{},"robust_chain = chain.with_fallbacks([fallback_llm])\n",[204,1292,1293],{"class":206,"line":238},[204,1294,229],{"emptyLinePlaceholder":228},[204,1296,1297],{"class":206,"line":244},[204,1298,1299],{},"# 逻辑：先试用便宜的模型，如果解析报错，自动调用贵但聪明的模型重试\n",[204,1301,1302],{"class":206,"line":250},[204,1303,1304],{},"result = robust_chain.invoke({\"input\": \"一段复杂的财务文本...\"})\n",[324,1306],{},[15,1308,1309],{},[25,1310,1311],{},"总结 Model I/O 的工程化精髓：",[76,1313,1314,1323,1332],{},[22,1315,1316,1319,1320,1322],{},[25,1317,1318],{},"统一接口","：通过 ",[85,1321,119],{}," 屏蔽厂商 API 差异。",[22,1324,1325,1319,1328,1331],{},[25,1326,1327],{},"模板管理",[85,1329,1330],{},"ChatPromptTemplate"," 确保消息序列的顺序和变量安全。",[22,1333,1334,1319,1337,1339,1340,1342],{},[25,1335,1336],{},"强类型解析",[85,1338,662],{}," + ",[85,1341,981],{}," 实现从“自然语言”到“后端对象”的稳健转化。",[58,1344,1346],{"id":1345},"_2-检索增强生成rag","2 检索增强生成（RAG）",[15,1348,1349,1350],{},"大模型虽然博学，但它不知道你的私有文档、最新的新闻或是公司的内部 API 规范。RAG 的核心思想是：",[25,1351,1352],{},"与其试图把所有知识都塞进模型的权重里，不如在模型回答之前，先帮它去“查书”。",[15,1354,1355,1356,955],{},"一个标准的 RAG 流程分为五个核心环节，我们称之为 ",[25,1357,1358],{},"“RAG 五部曲”",[19,1360,1361,1367,1373,1379,1385],{},[22,1362,1363,1366],{},[25,1364,1365],{},"载入 (Load)","：读取 PDF、网页、Word 或数据库。",[22,1368,1369,1372],{},[25,1370,1371],{},"切分 (Split)","：将长文档切成小块（Chunk），因为模型窗口有限。",[22,1374,1375,1378],{},[25,1376,1377],{},"向量化 (Embed)","：把文字转换成机器能理解的数字向量。",[22,1380,1381,1384],{},[25,1382,1383],{},"存储 (Store)","：存入向量数据库（Vector Store）。",[22,1386,1387,1390],{},[25,1388,1389],{},"检索 (Retrieve)","：用户提问时，找回最相关的文本块，喂给模型。",[324,1392],{},[63,1394,1396],{"id":1395},"_21-文档载入与切分","2.1 文档载入与切分",[15,1398,1399,1400,88],{},"在 LangChain 中，所有数据最终都必须转化为一个统一的 Python 对象：",[25,1401,1402],{},[85,1403,1404],{},"Document",[15,1406,1407,1408,1410],{},"一个 ",[85,1409,1404],{}," 对象包含两个核心字段：",[76,1412,1413,1419],{},[22,1414,1415,1418],{},[85,1416,1417],{},"page_content",": 文本内容。",[22,1420,1421,1424,1425,1428],{},[85,1422,1423],{},"metadata",": 一个字典，记录来源、页码、标题等元数据（这对后续的",[25,1426,1427],{},"精准过滤","至关重要）。",[15,1430,1431],{},[25,1432,1433],{},"工业级选型建议：",[19,1435,1436,1460],{},[22,1437,1438,955,1441,486,1444,486,1447,1450,1451],{},[25,1439,1440],{},"入门级",[85,1442,1443],{},"PyPDFLoader",[85,1445,1446],{},"CSVLoader",[85,1448,1449],{},"TextLoader","。\n",[76,1452,1453],{},[22,1454,1455,1459],{},[1456,1457,1458],"em",{},"特点","：简单、轻量，适合结构非常规整的文件。",[22,1461,1462,955,1465,1450,1470],{},[25,1463,1464],{},"专家级",[25,1466,1467],{},[85,1468,1469],{},"UnstructuredLoader",[76,1471,1472],{},[22,1473,1474,1476],{},[1456,1475,1458],{},"：它是 RAG 工程中的“瑞士军刀”。它能自动识别文件类型（PDF, HTML, Word, PPT），最重要的是，它能通过 OCR 或布局分析，智能地剥离网页的广告条、导航栏，只留下核心正文。",[195,1478,1480],{"className":197,"code":1479,"language":199,"meta":200,"style":200},"# 必须安装: pip install \"unstructured[all-docs]\"\nfrom langchain_community.document_loaders import UnstructuredURLLoader\n\nurls = [\"https://lilianweng.github.io/posts/2023-06-23-agent/\"]\n\n# 模拟工程中的异步加载（生产环境通常需要批量并行处理）\nloader = UnstructuredURLLoader(urls=urls)\ndata = loader.load()\n\nprint(f\"提取的元数据: {data[0].metadata}\")\n",[85,1481,1482,1487,1492,1496,1501,1505,1510,1515,1520,1524],{"__ignoreMap":200},[204,1483,1484],{"class":206,"line":207},[204,1485,1486],{},"# 必须安装: pip install \"unstructured[all-docs]\"\n",[204,1488,1489],{"class":206,"line":213},[204,1490,1491],{},"from langchain_community.document_loaders import UnstructuredURLLoader\n",[204,1493,1494],{"class":206,"line":219},[204,1495,229],{"emptyLinePlaceholder":228},[204,1497,1498],{"class":206,"line":225},[204,1499,1500],{},"urls = [\"https://lilianweng.github.io/posts/2023-06-23-agent/\"]\n",[204,1502,1503],{"class":206,"line":232},[204,1504,229],{"emptyLinePlaceholder":228},[204,1506,1507],{"class":206,"line":238},[204,1508,1509],{},"# 模拟工程中的异步加载（生产环境通常需要批量并行处理）\n",[204,1511,1512],{"class":206,"line":244},[204,1513,1514],{},"loader = UnstructuredURLLoader(urls=urls)\n",[204,1516,1517],{"class":206,"line":250},[204,1518,1519],{},"data = loader.load()\n",[204,1521,1522],{"class":206,"line":255},[204,1523,229],{"emptyLinePlaceholder":228},[204,1525,1526],{"class":206,"line":261},[204,1527,1528],{},"print(f\"提取的元数据: {data[0].metadata}\")\n",[324,1530],{},[15,1532,1533],{},[25,1534,1535],{},"Text Splitters：语义完整性的守护者 ✂️",[15,1537,1538],{},"这是工程化中最容易被忽视，却最影响模型智商的地方。为什么不能简单地按字符数“每 500 字切一刀”？",[76,1540,1541,1547],{},[22,1542,1543,1546],{},[25,1544,1545],{},"语义断裂","：一段完整的代码逻辑或一个复杂的表格，可能会被从中切断，导致 AI 检索到后看不懂。",[22,1548,1549,1552],{},[25,1550,1551],{},"上下文丢失","：如果切片太小，AI 只看到局部，无法理解大意。",[15,1554,1555],{},[25,1556,1557,1558,982],{},"核心策略一：递归字符切分 (",[85,1559,1560],{},"RecursiveCharacterTextSplitter",[15,1562,1563,1564,88],{},"它是 LangChain 的默认选择，也是最稳健的选择。它会按照一组字符顺序尝试切分：",[85,1565,1566],{},"[\"\\n\\n\", \"\\n\", \" \", \"\"]",[76,1568,1569],{},[22,1570,1571,1572,1575,1576,1579,1580,88],{},"它先试着按段落切（",[85,1573,1574],{},"\\n\\n","），如果段落太长，再试着按行切（",[85,1577,1578],{},"\\n","），最后才按空格切。这样能最大限度保证",[25,1581,1582],{},"段落结构的完整",[15,1584,1585],{},[25,1586,1587,1588,982],{},"核心策略二：结构化切分 (",[85,1589,1590],{},"MarkdownHeaderTextSplitter",[15,1592,1593,1594,88],{},"在处理 API 文档、技术手册时，",[25,1595,1596],{},"标题结构就是语义边界",[15,1598,1599,1600,486,1603,486,1606,1609,1610,1612],{},"这种切分器会将 ",[85,1601,1602],{},"#",[85,1604,1605],{},"##",[85,1607,1608],{},"###"," 标题直接存入 ",[85,1611,1423],{},"，并按标题层级切分。",[195,1614,1616],{"className":197,"code":1615,"language":199,"meta":200,"style":200},"from langchain_text_splitters import RecursiveCharacterTextSplitter\n\n# 工程参数深度解析：\ntext_splitter = RecursiveCharacterTextSplitter(\n    chunk_size=1000,      # 每个块的最大容量 (Token或字符)\n    chunk_overlap=200,    # 块与块之间的重叠区\n    length_function=len,  # 计算长度的函数，也可以换成 tiktoken 来计算真正的 Token 数\n    is_separator_regex=False,\n)\n\n# 为什么需要 chunk_overlap？\n# 想象你在看一本连环画，如果每页之间完全独立，你会漏掉剧情。\n# 重叠部分就像是“上一集回顾”，能保证相邻的块之间有语义衔接。\n",[85,1617,1618,1623,1627,1632,1637,1642,1647,1652,1657,1661,1665,1670,1675],{"__ignoreMap":200},[204,1619,1620],{"class":206,"line":207},[204,1621,1622],{},"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",[204,1624,1625],{"class":206,"line":213},[204,1626,229],{"emptyLinePlaceholder":228},[204,1628,1629],{"class":206,"line":219},[204,1630,1631],{},"# 工程参数深度解析：\n",[204,1633,1634],{"class":206,"line":225},[204,1635,1636],{},"text_splitter = RecursiveCharacterTextSplitter(\n",[204,1638,1639],{"class":206,"line":232},[204,1640,1641],{},"    chunk_size=1000,      # 每个块的最大容量 (Token或字符)\n",[204,1643,1644],{"class":206,"line":238},[204,1645,1646],{},"    chunk_overlap=200,    # 块与块之间的重叠区\n",[204,1648,1649],{"class":206,"line":244},[204,1650,1651],{},"    length_function=len,  # 计算长度的函数，也可以换成 tiktoken 来计算真正的 Token 数\n",[204,1653,1654],{"class":206,"line":250},[204,1655,1656],{},"    is_separator_regex=False,\n",[204,1658,1659],{"class":206,"line":255},[204,1660,520],{},[204,1662,1663],{"class":206,"line":261},[204,1664,229],{"emptyLinePlaceholder":228},[204,1666,1667],{"class":206,"line":267},[204,1668,1669],{},"# 为什么需要 chunk_overlap？\n",[204,1671,1672],{"class":206,"line":272},[204,1673,1674],{},"# 想象你在看一本连环画，如果每页之间完全独立，你会漏掉剧情。\n",[204,1676,1677],{"class":206,"line":278},[204,1678,1679],{},"# 重叠部分就像是“上一集回顾”，能保证相邻的块之间有语义衔接。\n",[324,1681],{},[15,1683,1684],{},[25,1685,1686],{},"工程化进阶：Chunk Size 的调优艺术 🎨",[15,1688,1689,1690,1693],{},"作为资深工程师，你需要掌握这套调优公式。",[85,1691,1692],{},"chunk_size"," 的选择是一个平衡木：",[1695,1696,1697,1724],"table",{},[1698,1699,1700],"thead",{},[1701,1702,1703,1709,1714,1719],"tr",{},[1704,1705,1706],"th",{},[25,1707,1708],{},"方案",[1704,1710,1711],{},[25,1712,1713],{},"优点",[1704,1715,1716],{},[25,1717,1718],{},"缺点",[1704,1720,1721],{},[25,1722,1723],{},"适用场景",[1725,1726,1727,1745,1762],"tbody",{},[1701,1728,1729,1736,1739,1742],{},[1730,1731,1732,1735],"td",{},[25,1733,1734],{},"小 Chunk"," (如 200)",[1730,1737,1738],{},"检索极其精准，节省 Token",[1730,1740,1741],{},"上下文严重缺失，AI 容易断章取义",[1730,1743,1744],{},"事实搜索（如：查找具体的电话、日期）",[1701,1746,1747,1753,1756,1759],{},[1730,1748,1749,1752],{},[25,1750,1751],{},"中 Chunk"," (如 800)",[1730,1754,1755],{},"语义保留较好，平衡性高",[1730,1757,1758],{},"容易混入无关信息",[1730,1760,1761],{},"通用问答、摘要提取",[1701,1763,1764,1770,1773,1776],{},[1730,1765,1766,1769],{},[25,1767,1768],{},"大 Chunk"," (如 2000)",[1730,1771,1772],{},"上下文完整，理解力强",[1730,1774,1775],{},"浪费 Token，检索噪声多",[1730,1777,1778],{},"复杂逻辑推理、长文总结",[15,1780,1781,1784,1785,88],{},[25,1782,1783],{},"工程技巧："," 在生产环境中，我们经常使用 ",[25,1786,1787],{},"“父子索引（Parent-Document Retrieval）”",[76,1789,1790,1796],{},[22,1791,1792,1795],{},[25,1793,1794],{},"做法","：把大文档切成大块（Parent），再把大块切成小块（Child）。",[22,1797,1798,1801,1802,1805],{},[25,1799,1800],{},"逻辑","：通过小块进行高精度的向量匹配，但",[25,1803,1804],{},"真正喂给 AI 的是大块的内容","。这样既保证了检索的准，又保证了 AI 看得全。",[324,1807],{},[602,1809,1810,1816,1819,1864,1867],{},[15,1811,1812,1813],{},"现在，我们已经把长篇大论变成了易于检索的“小积木”。",[25,1814,1815],{},"在进入下一步“向量化存储”之前，请你想一下：如果我们的文档里包含大量的代码块或表格，这种简单的按字符数切分（比如每 500 字一刀）可能会带来什么样的问题？",[15,1817,1818],{},"❗按字符数硬切，会破坏“语义完整性”和“结构完整性”",[1695,1820,1821,1831],{},[1698,1822,1823],{},[1701,1824,1825,1828],{},[1704,1826,1827],{},"问题",[1704,1829,1830],{},"本质",[1725,1832,1833,1841,1849,1856],{},[1701,1834,1835,1838],{},[1730,1836,1837],{},"代码断裂",[1730,1839,1840],{},"结构破坏",[1701,1842,1843,1846],{},[1730,1844,1845],{},"表格错乱",[1730,1847,1848],{},"格式破坏",[1701,1850,1851,1854],{},[1730,1852,1853],{},"语义不连贯",[1730,1855,1551],{},[1701,1857,1858,1861],{},[1730,1859,1860],{},"检索不准",[1730,1862,1863],{},"embedding 失真",[15,1865,1866],{},"🧠 正确思路",[15,1868,1869,1870,1873,1874,1224,1877,1880],{},"👉 ",[25,1871,1872],{},"不要按“字符”，要按“语义结构”切","，针对性地使用 ",[85,1875,1876],{},"MarkdownTextSplitter",[85,1878,1879],{},"PythonCodeTextSplitter","，它们会识别语法结构，尽量保证一个代码块或一个段落的完整性。",[63,1882,1884],{"id":1883},"_22-向量化-embedding-与-向量数据库-vector-store","2.2 向量化 (Embedding) 与 向量数据库 (Vector Store)",[15,1886,1887,1888],{},"当我们把文档切成了无数个“语义完好”的小积木（Chunks）后，接下来的挑战是：",[25,1889,1890],{},"计算机怎么知道哪块积木和用户的问题最相关？",[15,1892,1893,1894,1897],{},"这就是 ",[25,1895,1896],{},"向量化 (Embedding)"," 的魔力：",[19,1899,1900,1910,1916],{},[22,1901,1902,1905,1906,1909],{},[25,1903,1904],{},"转换","：我们将每一块文本通过 Embedding 模型（如 OpenAI 的 ",[85,1907,1908],{},"text-embedding-3-small","）转换成一串长长的数字向量。",[22,1911,1912,1915],{},[25,1913,1914],{},"空间关系","：在数学空间里，语义相近的句子，它们的向量距离就越近。例如，“猫”和“小猫”的向量距离，会比“猫”和“手机”近得多。",[22,1917,1918,1921,1922,1925],{},[25,1919,1920],{},"存储","：为了能快速从成千上万个向量中找到最接近的一个，我们需要专门的仓库 —— ",[25,1923,1924],{},"向量数据库","（常用的有 Chroma, FAISS, Pinecone）。",[15,1927,1928],{},[25,1929,1930],{},"代码实操：建立你的本地向量库",[15,1932,1933,1934,1937,1938,108],{},"我们将刚才切好的 ",[85,1935,1936],{},"splits"," 转化成向量并存入一个内存数据库（这里使用简单的 ",[85,1939,1940],{},"FAISS",[195,1942,1944],{"className":421,"code":1943,"language":423,"meta":200,"style":200},"from langchain_openai import OpenAIEmbeddings\nfrom langchain_community.vectorstores import FAISS\n\n# 1. 定义 Embedding 模型（负责把文本变数字）\nembeddings = OpenAIEmbeddings()\n\n# 2. 建立向量库\n# 这步会调用 API 将所有的 splits 转化成向量，并构建索引\nvectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)\n\n# 3. 测试检索：寻找与“什么是 Agent 的记忆”最相关的文本块\nquery = \"什么是 Agent 的记忆？\"\ndocs = vectorstore.similarity_search(query)\n\nprint(f\"检索到 {len(docs)} 条相关文档\")\nprint(f\"最匹配的内容片段：\\n{docs[0].page_content[:200]}...\")\n",[85,1945,1946,1957,1969,1973,1978,1988,1992,1997,2002,2031,2035,2040,2050,2060,2064,2090],{"__ignoreMap":200},[204,1947,1948,1950,1952,1954],{"class":206,"line":207},[204,1949,431],{"class":430},[204,1951,448],{"class":434},[204,1953,438],{"class":430},[204,1955,1956],{"class":434}," OpenAIEmbeddings\n",[204,1958,1959,1961,1964,1966],{"class":206,"line":213},[204,1960,431],{"class":430},[204,1962,1963],{"class":434}," langchain_community.vectorstores ",[204,1965,438],{"class":430},[204,1967,1968],{"class":492}," FAISS\n",[204,1970,1971],{"class":206,"line":219},[204,1972,229],{"emptyLinePlaceholder":228},[204,1974,1975],{"class":206,"line":225},[204,1976,1977],{"class":462},"# 1. 定义 Embedding 模型（负责把文本变数字）\n",[204,1979,1980,1983,1985],{"class":206,"line":232},[204,1981,1982],{"class":434},"embeddings ",[204,1984,471],{"class":430},[204,1986,1987],{"class":434}," OpenAIEmbeddings()\n",[204,1989,1990],{"class":206,"line":238},[204,1991,229],{"emptyLinePlaceholder":228},[204,1993,1994],{"class":206,"line":244},[204,1995,1996],{"class":462},"# 2. 建立向量库\n",[204,1998,1999],{"class":206,"line":250},[204,2000,2001],{"class":462},"# 这步会调用 API 将所有的 splits 转化成向量，并构建索引\n",[204,2003,2004,2007,2009,2012,2015,2018,2020,2023,2026,2028],{"class":206,"line":255},[204,2005,2006],{"class":434},"vectorstore ",[204,2008,471],{"class":430},[204,2010,2011],{"class":492}," FAISS",[204,2013,2014],{"class":434},".from_documents(",[204,2016,2017],{"class":767},"documents",[204,2019,471],{"class":430},[204,2021,2022],{"class":434},"splits, ",[204,2024,2025],{"class":767},"embedding",[204,2027,471],{"class":430},[204,2029,2030],{"class":434},"embeddings)\n",[204,2032,2033],{"class":206,"line":261},[204,2034,229],{"emptyLinePlaceholder":228},[204,2036,2037],{"class":206,"line":267},[204,2038,2039],{"class":462},"# 3. 测试检索：寻找与“什么是 Agent 的记忆”最相关的文本块\n",[204,2041,2042,2045,2047],{"class":206,"line":272},[204,2043,2044],{"class":434},"query ",[204,2046,471],{"class":430},[204,2048,2049],{"class":482}," \"什么是 Agent 的记忆？\"\n",[204,2051,2052,2055,2057],{"class":206,"line":278},[204,2053,2054],{"class":434},"docs ",[204,2056,471],{"class":430},[204,2058,2059],{"class":434}," vectorstore.similarity_search(query)\n",[204,2061,2062],{"class":206,"line":284},[204,2063,229],{"emptyLinePlaceholder":228},[204,2065,2066,2068,2070,2073,2076,2079,2082,2085,2088],{"class":206,"line":290},[204,2067,582],{"class":492},[204,2069,726],{"class":434},[204,2071,2072],{"class":430},"f",[204,2074,2075],{"class":482},"\"检索到 ",[204,2077,2078],{"class":492},"{len",[204,2080,2081],{"class":434},"(docs)",[204,2083,2084],{"class":492},"}",[204,2086,2087],{"class":482}," 条相关文档\"",[204,2089,520],{"class":434},[204,2091,2092,2094,2096,2098,2101,2104,2107,2110,2113,2116,2119,2121,2124],{"class":206,"line":296},[204,2093,582],{"class":492},[204,2095,726],{"class":434},[204,2097,2072],{"class":430},[204,2099,2100],{"class":482},"\"最匹配的内容片段：",[204,2102,2103],{"class":492},"\\n{",[204,2105,2106],{"class":434},"docs[",[204,2108,2109],{"class":492},"0",[204,2111,2112],{"class":434},"].page_content[:",[204,2114,2115],{"class":492},"200",[204,2117,2118],{"class":434},"]",[204,2120,2084],{"class":492},[204,2122,2123],{"class":482},"...\"",[204,2125,520],{"class":434},[15,2127,2128],{},[25,2129,2130],{},"让 LCEL 串联起 RAG 全流程 ⛓️",[15,2132,2133,2134,2137,2138,2141],{},"现在你有了一个“图书馆”（向量库），接下来我们需要用 LCEL 把",[25,2135,2136],{},"检索","和",[25,2139,2140],{},"回答","缝合在一起。",[15,2143,2144,2145,2148],{},"这就涉及到一个核心组件：",[25,2146,2147],{},"Retriever（检索器）","。它不是一个数据库，而是一个“寻找者”接口。",[195,2150,2152],{"className":421,"code":2151,"language":423,"meta":200,"style":200},"from langchain_core.runnables import RunnablePassthrough\n\n# 1. 将向量库转为检索器\nretriever = vectorstore.as_retriever()\n\n# 2. 定义 RAG Prompt\ntemplate = \"\"\"你是一个专业的助手。请根据提供的上下文回答问题。\n如果上下文中没有相关信息，请诚实回答不知道。\n\n上下文: {context}\n问题: {question}\n回答:\"\"\"\nprompt = ChatPromptTemplate.from_template(template)\n\n# 3. 构建 RAG 链\nrag_chain = (\n    # 这里用到了我们之前学的并行逻辑\n    {\"context\": retriever, \"question\": RunnablePassthrough()}\n    | prompt\n    | model\n    | StrOutputParser()\n)\n\n# 4. 一键执行\nresponse = rag_chain.invoke(\"如何定义大模型 Agent 的长期记忆？\")\nprint(response)\n",[85,2153,2154,2166,2170,2175,2185,2189,2194,2204,2209,2213,2221,2229,2234,2244,2248,2253,2263,2268,2285,2293,2300,2307,2311,2315,2320,2335],{"__ignoreMap":200},[204,2155,2156,2158,2161,2163],{"class":206,"line":207},[204,2157,431],{"class":430},[204,2159,2160],{"class":434}," langchain_core.runnables ",[204,2162,438],{"class":430},[204,2164,2165],{"class":434}," RunnablePassthrough\n",[204,2167,2168],{"class":206,"line":213},[204,2169,229],{"emptyLinePlaceholder":228},[204,2171,2172],{"class":206,"line":219},[204,2173,2174],{"class":462},"# 1. 将向量库转为检索器\n",[204,2176,2177,2180,2182],{"class":206,"line":225},[204,2178,2179],{"class":434},"retriever ",[204,2181,471],{"class":430},[204,2183,2184],{"class":434}," vectorstore.as_retriever()\n",[204,2186,2187],{"class":206,"line":232},[204,2188,229],{"emptyLinePlaceholder":228},[204,2190,2191],{"class":206,"line":238},[204,2192,2193],{"class":462},"# 2. 定义 RAG Prompt\n",[204,2195,2196,2199,2201],{"class":206,"line":244},[204,2197,2198],{"class":434},"template ",[204,2200,471],{"class":430},[204,2202,2203],{"class":482}," \"\"\"你是一个专业的助手。请根据提供的上下文回答问题。\n",[204,2205,2206],{"class":206,"line":250},[204,2207,2208],{"class":482},"如果上下文中没有相关信息，请诚实回答不知道。\n",[204,2210,2211],{"class":206,"line":255},[204,2212,229],{"emptyLinePlaceholder":228},[204,2214,2215,2218],{"class":206,"line":261},[204,2216,2217],{"class":482},"上下文: ",[204,2219,2220],{"class":492},"{context}\n",[204,2222,2223,2226],{"class":206,"line":267},[204,2224,2225],{"class":482},"问题: ",[204,2227,2228],{"class":492},"{question}\n",[204,2230,2231],{"class":206,"line":272},[204,2232,2233],{"class":482},"回答:\"\"\"\n",[204,2235,2236,2239,2241],{"class":206,"line":278},[204,2237,2238],{"class":434},"prompt ",[204,2240,471],{"class":430},[204,2242,2243],{"class":434}," ChatPromptTemplate.from_template(template)\n",[204,2245,2246],{"class":206,"line":284},[204,2247,229],{"emptyLinePlaceholder":228},[204,2249,2250],{"class":206,"line":290},[204,2251,2252],{"class":462},"# 3. 构建 RAG 链\n",[204,2254,2255,2258,2260],{"class":206,"line":296},[204,2256,2257],{"class":434},"rag_chain ",[204,2259,471],{"class":430},[204,2261,2262],{"class":434}," (\n",[204,2264,2265],{"class":206,"line":301},[204,2266,2267],{"class":462},"    # 这里用到了我们之前学的并行逻辑\n",[204,2269,2270,2273,2276,2279,2282],{"class":206,"line":307},[204,2271,2272],{"class":434},"    {",[204,2274,2275],{"class":482},"\"context\"",[204,2277,2278],{"class":434},": retriever, ",[204,2280,2281],{"class":482},"\"question\"",[204,2283,2284],{"class":434},": RunnablePassthrough()}\n",[204,2286,2287,2290],{"class":206,"line":313},[204,2288,2289],{"class":430},"    |",[204,2291,2292],{"class":434}," prompt\n",[204,2294,2295,2297],{"class":206,"line":872},[204,2296,2289],{"class":430},[204,2298,2299],{"class":434}," model\n",[204,2301,2302,2304],{"class":206,"line":886},[204,2303,2289],{"class":430},[204,2305,2306],{"class":434}," StrOutputParser()\n",[204,2308,2309],{"class":206,"line":906},[204,2310,520],{"class":434},[204,2312,2313],{"class":206,"line":911},[204,2314,229],{"emptyLinePlaceholder":228},[204,2316,2317],{"class":206,"line":916},[204,2318,2319],{"class":462},"# 4. 一键执行\n",[204,2321,2322,2325,2327,2330,2333],{"class":206,"line":922},[204,2323,2324],{"class":434},"response ",[204,2326,471],{"class":430},[204,2328,2329],{"class":434}," rag_chain.invoke(",[204,2331,2332],{"class":482},"\"如何定义大模型 Agent 的长期记忆？\"",[204,2334,520],{"class":434},[204,2336,2337,2339],{"class":206,"line":937},[204,2338,582],{"class":492},[204,2340,2341],{"class":434},"(response)\n",[15,2343,2344],{},[25,2345,2346],{},"深度点拨 🧠",[15,2348,2349,2350,2353,2354,2357],{},"在这个 ",[85,2351,2352],{},"rag_chain"," 中，当你调用 ",[85,2355,2356],{},"invoke"," 时：",[19,2359,2360,2369,2377],{},[22,2361,2362,2365,2366,88],{},[85,2363,2364],{},"question"," 被同时传给了 ",[85,2367,2368],{},"retriever",[22,2370,2371,2373,2374,88],{},[85,2372,2368],{}," 自动去向量库里“捞”出了最相关的文档，填入了 ",[85,2375,2376],{},"context",[22,2378,2379],{},"Prompt 拿到了真实的背景知识，从而避免了“一本正经胡说八道”（幻觉）。",[63,2381,2383],{"id":2382},"_23-retrievers从查库到智能搜索的跃迁","2.3 Retrievers：从“查库”到“智能搜索”的跃迁",[15,2385,2386],{},"如果我们的向量库里存了上万条数据，但用户的问题非常口语化（例如：“那个谁写的那个关于 AI 的东西是怎么说的？”），光靠“向量距离匹配”确实很难处理模糊或口语化的查询。在万级甚至亿级数据的库里，如果用户说“那个谁写的关于 AI 的东西”，向量搜索可能会因为“东西”、“那个谁”这些词的干扰，带回一堆噪音。",[15,2388,2389,2390,2393,2394,2397,2398,2401,2402,2405],{},"在工程化 RAG 中，我们通过 ",[25,2391,2392],{},"Retrievers (检索器)"," 这一层抽象，在“数据库”和“模型”之间加入一套精密的",[25,2395,2396],{},"逻辑过滤器","。在 LangChain 的哲学里，",[85,2399,2400],{},"Vectorstore"," 是存储，而 ",[85,2403,2404],{},"Retriever"," 是行为。一个检索器不仅可以去向量库里搜，还可以去搜索引擎、图数据库、甚至是你的本地文件里搜。",[15,2407,2408],{},[25,2409,2410],{},"策略一：多查询检索 (Multi-Query Retriever) —— 解决“词不达意”",[15,2412,2413,2414,88],{},"这是应对“口语化、模糊查询”的工业级杀手锏。它的逻辑不是直接拿着用户的那句烂话去搜，而是先让大模型把这句话",[25,2415,2416],{},"重写成 3-5 个不同角度的专业查询",[76,2418,2419],{},[22,2420,2421,2424,2425],{},[25,2422,2423],{},"工程逻辑","：\n",[19,2426,2427,2430,2444],{},[22,2428,2429],{},"用户输入：“那个谁写的 AI 东西”。",[22,2431,2432,2433],{},"LLM 转换成：\n",[76,2434,2435,2438,2441],{},[22,2436,2437],{},"“Lilian Weng 关于 LLM Agents 的综述文章”",[22,2439,2440],{},"“人工智能代理系统的核心架构设计”",[22,2442,2443],{},"“2023年关于 AI Agent 的深度分析”",[22,2445,2446,2447,88],{},"拿着这 3 句话去向量库搜，最后把结果",[25,2448,2449],{},"去重合并",[195,2451,2453],{"className":197,"code":2452,"language":199,"meta":200,"style":200},"from langchain.retrievers.multi_query import MultiQueryRetriever\nfrom langchain_openai import ChatOpenAI\n\n# 定义大脑\nmodel = ChatOpenAI(temperature=0)\n\n# 将简单的向量库包装成一个“聪明”的多查询检索器\nretriever_from_llm = MultiQueryRetriever.from_llm(\n    retriever=vectorstore.as_retriever(), \n    llm=model\n)\n\n# 这样搜出来的结果，覆盖面和准确度会大幅提升\nunique_docs = retriever_from_llm.invoke(\"那个谁写的 AI 东西\")\n",[85,2454,2455,2460,2464,2468,2473,2478,2482,2487,2492,2497,2502,2506,2510,2515],{"__ignoreMap":200},[204,2456,2457],{"class":206,"line":207},[204,2458,2459],{},"from langchain.retrievers.multi_query import MultiQueryRetriever\n",[204,2461,2462],{"class":206,"line":213},[204,2463,216],{},[204,2465,2466],{"class":206,"line":219},[204,2467,229],{"emptyLinePlaceholder":228},[204,2469,2470],{"class":206,"line":225},[204,2471,2472],{},"# 定义大脑\n",[204,2474,2475],{"class":206,"line":232},[204,2476,2477],{},"model = ChatOpenAI(temperature=0)\n",[204,2479,2480],{"class":206,"line":238},[204,2481,229],{"emptyLinePlaceholder":228},[204,2483,2484],{"class":206,"line":244},[204,2485,2486],{},"# 将简单的向量库包装成一个“聪明”的多查询检索器\n",[204,2488,2489],{"class":206,"line":250},[204,2490,2491],{},"retriever_from_llm = MultiQueryRetriever.from_llm(\n",[204,2493,2494],{"class":206,"line":255},[204,2495,2496],{},"    retriever=vectorstore.as_retriever(), \n",[204,2498,2499],{"class":206,"line":261},[204,2500,2501],{},"    llm=model\n",[204,2503,2504],{"class":206,"line":267},[204,2505,520],{},[204,2507,2508],{"class":206,"line":272},[204,2509,229],{"emptyLinePlaceholder":228},[204,2511,2512],{"class":206,"line":278},[204,2513,2514],{},"# 这样搜出来的结果，覆盖面和准确度会大幅提升\n",[204,2516,2517],{"class":206,"line":284},[204,2518,2519],{},"unique_docs = retriever_from_llm.invoke(\"那个谁写的 AI 东西\")\n",[15,2521,2522],{},[25,2523,2524],{},"策略二：上下文压缩 (Contextual Compression) —— 告别“Token 浪费”",[15,2526,2527,2528,2531],{},"向量库检索返回的是整个“块（Chunk）”。但大模型可能只需要这个块里的",[25,2529,2530],{},"一两句话","。如果把整块都喂给模型，不仅浪费钱，还会因为干扰信息太多导致模型智商下降。",[76,2533,2534],{},[22,2535,2536,2538,2539,2542],{},[25,2537,2423],{},"：利用一个专门的 ",[85,2540,2541],{},"Compressor","（压缩器），在检索回来的结果里进行二次筛选，只把最相关的片段“剪”出来喂给模型。",[15,2544,2545],{},[25,2546,2547],{},"策略三：混合检索 (Ensemble Retrieval) —— 兼顾“语义”与“关键词”",[15,2549,2550],{},"这是目前中文 RAG 工程中的标准配置。",[76,2552,2553,2559,2565],{},[22,2554,2555,2558],{},[25,2556,2557],{},"向量搜索 (Dense)","：擅长搜“语义”。（搜“猫”，能找到“小猫”、“萌宠”）",[22,2560,2561,2564],{},[25,2562,2563],{},"关键字搜索 (BM25/Sparse)","：擅长搜“特定名词”。（搜“iPhone 15 Pro Max”，向量搜索可能会带回一堆“苹果手机”，而 BM25 能精准锁定型号）",[22,2566,2567,2570],{},[25,2568,2569],{},"Ensemble","：将两者的结果按权重（如 RRF 算法）融合。",[195,2572,2574],{"className":197,"code":2573,"language":199,"meta":200,"style":200},"from langchain.retrievers import BM25Retriever, EnsembleRetriever\n\n# 1. 初始化关键字检索器\nbm25_retriever = BM25Retriever.from_documents(documents)\nbm25_retriever.k = 2\n\n# 2. 初始化向量检索器\nvector_retriever = vectorstore.as_retriever(search_kwargs={\"k\": 2})\n\n# 3. 组合：权重各占 0.5\nensemble_retriever = EnsembleRetriever(\n    retrievers=[bm25_retriever, vector_retriever], \n    weights=[0.5, 0.5]\n)\n",[85,2575,2576,2581,2585,2590,2595,2600,2604,2609,2614,2618,2623,2628,2633,2638],{"__ignoreMap":200},[204,2577,2578],{"class":206,"line":207},[204,2579,2580],{},"from langchain.retrievers import BM25Retriever, EnsembleRetriever\n",[204,2582,2583],{"class":206,"line":213},[204,2584,229],{"emptyLinePlaceholder":228},[204,2586,2587],{"class":206,"line":219},[204,2588,2589],{},"# 1. 初始化关键字检索器\n",[204,2591,2592],{"class":206,"line":225},[204,2593,2594],{},"bm25_retriever = BM25Retriever.from_documents(documents)\n",[204,2596,2597],{"class":206,"line":232},[204,2598,2599],{},"bm25_retriever.k = 2\n",[204,2601,2602],{"class":206,"line":238},[204,2603,229],{"emptyLinePlaceholder":228},[204,2605,2606],{"class":206,"line":244},[204,2607,2608],{},"# 2. 初始化向量检索器\n",[204,2610,2611],{"class":206,"line":250},[204,2612,2613],{},"vector_retriever = vectorstore.as_retriever(search_kwargs={\"k\": 2})\n",[204,2615,2616],{"class":206,"line":255},[204,2617,229],{"emptyLinePlaceholder":228},[204,2619,2620],{"class":206,"line":261},[204,2621,2622],{},"# 3. 组合：权重各占 0.5\n",[204,2624,2625],{"class":206,"line":267},[204,2626,2627],{},"ensemble_retriever = EnsembleRetriever(\n",[204,2629,2630],{"class":206,"line":272},[204,2631,2632],{},"    retrievers=[bm25_retriever, vector_retriever], \n",[204,2634,2635],{"class":206,"line":278},[204,2636,2637],{},"    weights=[0.5, 0.5]\n",[204,2639,2640],{"class":206,"line":284},[204,2641,520],{},[324,2643],{},[15,2645,2646],{},[25,2647,2648],{},"检索系统的“工程红线” 🚩",[15,2650,2651],{},"作为资深工程师，在设计 RAG 检索层时，必须考虑以下三个工程指标：",[19,2653,2654,2660,2666],{},[22,2655,2656,2659],{},[25,2657,2658],{},"Recall (召回率)","：宁可多搜点，别漏掉关键信息。这就是为什么要用多查询和混合检索。",[22,2661,2662,2665],{},[25,2663,2664],{},"Precision (精准率)","：别带回一堆废话，否则会引发模型“幻觉”。这需要靠上下文压缩和 Rerank（重排）。",[22,2667,2668,2671],{},[25,2669,2670],{},"Latency (延迟)","：多查询和压缩都会增加调用次数，导致变慢。在追求极致体验的前端应用中，通常需要异步并行这些任务。",[324,2673],{},[15,2675,2676],{},[25,2677,2678],{},"Rerank：检索的最后一道防线 🛡️",[15,2680,2681],{},"在复杂的 RAG 系统中，我们通常会搜回 10-20 个文档块，但最后喂给 LLM 的只有前 3 个。",[15,2683,2684,2687],{},[25,2685,2686],{},"问题来了","：向量数据库基于余弦相似度的评分，真的代表“最相关”吗？不一定。",[15,2689,2690,2693,2694,2697],{},[25,2691,2692],{},"工程做法","：引入一个 ",[25,2695,2696],{},"Rerank Model","（重排模型，如 BGE-Reranker）。",[19,2699,2700,2703],{},[22,2701,2702],{},"向量库先初步筛选（粗排）。",[22,2704,2705],{},"Rerank 模型对这 10 个结果进行精细打分，把真正最有用的排到最前面。这是提升 RAG 回答准确度最立竿见影的手段。",[324,2707],{},[63,2709,2711],{"id":2710},"_24-拥有记忆的对话链","2.4 拥有记忆的对话链",[15,2713,2714,2717,2718,2721,2722,2724],{},[25,2715,2716],{},"现在的 RAG 看起来很完美，但请思考一个实际场景："," 如果用户第一句问：“什么是量子计算？”，第二句问：“",[25,2719,2720],{},"它","有什么应用？”，由于 RAG 链每次 ",[85,2723,2356],{}," 都是独立的，你的检索器去搜“它”这个词时，能搜到正确的结果吗？",[15,2726,2727,2728,2731],{},"为了解决这个问题，我们需要引入一个核心机制：",[25,2729,2730],{},"查询重写 (Query Transformation) 与 历史记忆 (Memory)","。在真实的业务架构中，我们会在用户提问和检索器之间，插入一个**“问题重写层” (History-aware Retriever)**。",[15,2733,2734],{},[25,2735,2736],{},"它的工作流是这样的：",[19,2738,2739,2745,2751,2761],{},[22,2740,2741,2744],{},[25,2742,2743],{},"拦截","：拦截用户的最新问题（“它有什么应用？”）。",[22,2746,2747,2750],{},[25,2748,2749],{},"回顾","：提取之前的聊天记录（“什么是量子计算？”）。",[22,2752,2753,2756,2757,2760],{},[25,2754,2755],{},"重写","：利用大模型，将这两个信息融合成一个",[25,2758,2759],{},"独立的、无歧义的查询","（“量子计算有什么应用？”）。",[22,2762,2763,2765],{},[25,2764,2136],{},"：拿着重写后的查询，再去向量数据库搜索。",[15,2767,2768],{},[25,2769,2770],{},"逻辑代码演示",[15,2772,2773],{},"在 LangChain (LCEL) 中，我们可以这样组合积木：",[195,2775,2777],{"className":421,"code":2776,"language":423,"meta":200,"style":200},"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\nfrom langchain_core.runnables import RunnablePassthrough\nfrom langchain_core.output_parsers import StrOutputParser\n\n# 1. 定义问题重写 Prompt (引入了 MessagesPlaceholder 来接收历史记录)\ncondense_prompt = ChatPromptTemplate.from_messages([\n    (\"system\", \"根据历史对话，将用户的最新问题重写为一个独立的问题。\"),\n    MessagesPlaceholder(variable_name=\"chat_history\"),\n    (\"human\", \"{question}\")\n])\n\n# 2. 构建“查询重写链”\ncondense_question_chain = condense_prompt | model | StrOutputParser()\n\n# 3. 构建完整的对话 RAG 链\n# 注意：这只是核心逻辑演示，实际生产中常结合 RunnableWithMessageHistory 使用\nconversational_rag_chain = (\n    RunnablePassthrough.assign(\n        # 动态生成重写后的问题\n        standalone_question=condense_question_chain\n    )\n    | RunnablePassthrough.assign(\n        # 拿重写后的问题去检索上下文\n        context=lambda x: retriever.invoke(x[\"standalone_question\"])\n    )\n    | answer_prompt # 最后交给回答 Prompt\n    | model\n)\n",[85,2778,2779,2790,2800,2812,2816,2821,2830,2843,2858,2876,2880,2884,2889,2909,2913,2918,2923,2932,2937,2942,2952,2956,2963,2968,2984,2988,2998,3004],{"__ignoreMap":200},[204,2780,2781,2783,2785,2787],{"class":206,"line":207},[204,2782,431],{"class":430},[204,2784,435],{"class":434},[204,2786,438],{"class":430},[204,2788,2789],{"class":434}," ChatPromptTemplate, MessagesPlaceholder\n",[204,2791,2792,2794,2796,2798],{"class":206,"line":213},[204,2793,431],{"class":430},[204,2795,2160],{"class":434},[204,2797,438],{"class":430},[204,2799,2165],{"class":434},[204,2801,2802,2804,2807,2809],{"class":206,"line":219},[204,2803,431],{"class":430},[204,2805,2806],{"class":434}," langchain_core.output_parsers ",[204,2808,438],{"class":430},[204,2810,2811],{"class":434}," StrOutputParser\n",[204,2813,2814],{"class":206,"line":225},[204,2815,229],{"emptyLinePlaceholder":228},[204,2817,2818],{"class":206,"line":232},[204,2819,2820],{"class":462},"# 1. 定义问题重写 Prompt (引入了 MessagesPlaceholder 来接收历史记录)\n",[204,2822,2823,2826,2828],{"class":206,"line":238},[204,2824,2825],{"class":434},"condense_prompt ",[204,2827,471],{"class":430},[204,2829,474],{"class":434},[204,2831,2832,2834,2836,2838,2841],{"class":206,"line":244},[204,2833,479],{"class":434},[204,2835,483],{"class":482},[204,2837,486],{"class":434},[204,2839,2840],{"class":482},"\"根据历史对话，将用户的最新问题重写为一个独立的问题。\"",[204,2842,499],{"class":434},[204,2844,2845,2848,2851,2853,2856],{"class":206,"line":250},[204,2846,2847],{"class":434},"    MessagesPlaceholder(",[204,2849,2850],{"class":767},"variable_name",[204,2852,471],{"class":430},[204,2854,2855],{"class":482},"\"chat_history\"",[204,2857,499],{"class":434},[204,2859,2860,2862,2864,2866,2869,2872,2874],{"class":206,"line":255},[204,2861,479],{"class":434},[204,2863,506],{"class":482},[204,2865,486],{"class":434},[204,2867,2868],{"class":482},"\"",[204,2870,2871],{"class":492},"{question}",[204,2873,2868],{"class":482},[204,2875,520],{"class":434},[204,2877,2878],{"class":206,"line":261},[204,2879,525],{"class":434},[204,2881,2882],{"class":206,"line":267},[204,2883,229],{"emptyLinePlaceholder":228},[204,2885,2886],{"class":206,"line":272},[204,2887,2888],{"class":462},"# 2. 构建“查询重写链”\n",[204,2890,2891,2894,2896,2899,2902,2905,2907],{"class":206,"line":278},[204,2892,2893],{"class":434},"condense_question_chain ",[204,2895,471],{"class":430},[204,2897,2898],{"class":434}," condense_prompt ",[204,2900,2901],{"class":430},"|",[204,2903,2904],{"class":434}," model ",[204,2906,2901],{"class":430},[204,2908,2306],{"class":434},[204,2910,2911],{"class":206,"line":284},[204,2912,229],{"emptyLinePlaceholder":228},[204,2914,2915],{"class":206,"line":290},[204,2916,2917],{"class":462},"# 3. 构建完整的对话 RAG 链\n",[204,2919,2920],{"class":206,"line":296},[204,2921,2922],{"class":462},"# 注意：这只是核心逻辑演示，实际生产中常结合 RunnableWithMessageHistory 使用\n",[204,2924,2925,2928,2930],{"class":206,"line":301},[204,2926,2927],{"class":434},"conversational_rag_chain ",[204,2929,471],{"class":430},[204,2931,2262],{"class":434},[204,2933,2934],{"class":206,"line":307},[204,2935,2936],{"class":434},"    RunnablePassthrough.assign(\n",[204,2938,2939],{"class":206,"line":313},[204,2940,2941],{"class":462},"        # 动态生成重写后的问题\n",[204,2943,2944,2947,2949],{"class":206,"line":872},[204,2945,2946],{"class":767},"        standalone_question",[204,2948,471],{"class":430},[204,2950,2951],{"class":434},"condense_question_chain\n",[204,2953,2954],{"class":206,"line":886},[204,2955,824],{"class":434},[204,2957,2958,2960],{"class":206,"line":906},[204,2959,2289],{"class":430},[204,2961,2962],{"class":434}," RunnablePassthrough.assign(\n",[204,2964,2965],{"class":206,"line":911},[204,2966,2967],{"class":462},"        # 拿重写后的问题去检索上下文\n",[204,2969,2970,2973,2976,2979,2982],{"class":206,"line":916},[204,2971,2972],{"class":767},"        context",[204,2974,2975],{"class":430},"=lambda",[204,2977,2978],{"class":434}," x: retriever.invoke(x[",[204,2980,2981],{"class":482},"\"standalone_question\"",[204,2983,525],{"class":434},[204,2985,2986],{"class":206,"line":922},[204,2987,824],{"class":434},[204,2989,2990,2992,2995],{"class":206,"line":937},[204,2991,2289],{"class":430},[204,2993,2994],{"class":434}," answer_prompt ",[204,2996,2997],{"class":462},"# 最后交给回答 Prompt\n",[204,2999,3000,3002],{"class":206,"line":947},[204,3001,2289],{"class":430},[204,3003,2299],{"class":434},[204,3005,3006],{"class":206,"line":1177},[204,3007,520],{"class":434},[15,3009,3010],{},"通过这套组合拳，你的 AI 就不再是“金鱼记忆”，而是一个能进行多轮深度探讨的领域专家了。",[324,3012],{},[58,3014,3016],{"id":3015},"_3-lcel","3. LCEL",[15,3018,3019,3020,3023,3024,3026],{},"在深入复杂的 Agent 之前，我们必须先理解最基本的 ",[25,3021,3022],{},"LCEL (LangChain 表达式语言)","。它使用类似 Unix 管道符 ",[85,3025,2901],{}," 的语法，让逻辑清晰可见。",[15,3028,3029,3030,3033,3034,3040],{},"很多开发者初看 LCEL，觉得 ",[85,3031,3032],{},"chain = prompt | model | parser"," 只是为了少写几行 Python 代码。**这是一个巨大的误解。**LCEL 的真正价值在于它实现了一套统一的 ",[25,3035,3036,3039],{},[85,3037,3038],{},"Runnable"," 协议","。在工业级应用中，这意味着你的链条天然具备了以下高级特性：",[19,3042,3043,3052,3061,3070],{},[22,3044,3045,3051],{},[25,3046,3047,3048,982],{},"异步支持 (",[85,3049,3050],{},"ainvoke","：在高并发的后端（如 FastAPI）中，你可以无缝使用异步调用，不会阻塞线程。",[22,3053,3054,3060],{},[25,3055,3056,3057,982],{},"流式传输 (",[85,3058,3059],{},"stream","：即使是极其复杂的 RAG 链，你也能让模型像打字机一样一个字一个字地吐出来。",[22,3062,3063,3069],{},[25,3064,3065,3066,982],{},"自动并行 (",[85,3067,3068],{},"RunnableParallel","：如果你有两个检索器，它们会自动并发运行，显著降低响应时长（Latency）。",[22,3071,3072,3078],{},[25,3073,3074,3075,982],{},"容错回退 (",[85,3076,3077],{},"with_fallbacks","：模型挂了？自动切换到备用模型，无需写复杂的 try-except。",[324,3080],{},[15,3082,3083],{},[25,3084,3085],{},"核心基石：Runnable 协议 🧱",[15,3087,3088,3089,88],{},"LangChain 之所以能像搭积木一样灵活，是因为几乎所有的组件（如模型、提示词、解析器、甚至你自定义的函数）都遵循同一个协议：",[25,3090,3038],{},[15,3092,3093],{},"这意味着不管组件内部逻辑多复杂，它们都拥有统一的“标准接口”。最常用的四个方法是：",[76,3095,3096,3103,3111,3118],{},[22,3097,3098,3102],{},[25,3099,3100],{},[85,3101,2356],{},"：同步调用，单个输入，单个输出。",[22,3104,3105,3110],{},[25,3106,3107],{},[85,3108,3109],{},"batch","：内部并行执行，输入列表，输出列表。",[22,3112,3113,3117],{},[25,3114,3115],{},[85,3116,3059],{},"：流式输出。",[22,3119,3120,3124],{},[25,3121,3122],{},[85,3123,3050],{},"：上述方法的异步版本。",[324,3126],{},[15,3128,3129],{},[25,3130,3131],{},"代码实操：感受“链”的力量 🔗",[15,3133,3134],{},"我们将Model 和Retrieval结合起来，构建一条具备参数透传、并行检索和容错机制的生产级链路。",[195,3136,3138],{"className":421,"code":3137,"language":423,"meta":200,"style":200},"from langchain_core.runnables import RunnablePassthrough, RunnableParallel\nfrom langchain_core.output_parsers import StrOutputParser\nfrom langchain_openai import ChatOpenAI\n\n# 1. 定义组件\nmodel = ChatOpenAI(model=\"gpt-3.5-turbo\")\nfallback_model = ChatOpenAI(model=\"gpt-4o\") # 备用模型\nretriever = vectorstore.as_retriever()\n\n# 2. 构建具备容错的模型组件\nsmart_model = model.with_fallbacks([fallback_model])\n\n# 3. 组合链（LCEL）\nchain = (\n    # 第一阶段：并行准备数据\n    # context 去搜库，question 直接透传\n    RunnableParallel({\n        \"context\": retriever,\n        \"question\": RunnablePassthrough()\n    })\n    # 第二阶段：填入 Prompt\n    | prompt_template \n    # 第三阶段：调用具备回退机制的模型\n    | smart_model \n    # 第四阶段：标准化解析\n    | StrOutputParser()\n)\n\n# 4. 生产级调用：流式输出\nfor chunk in chain.stream(\"什么是量子纠缠？\"):\n    print(chunk, end=\"\", flush=True)\n",[85,3139,3140,3151,3161,3171,3175,3180,3200,3222,3230,3234,3239,3249,3253,3258,3267,3272,3277,3282,3290,3298,3303,3308,3315,3320,3327,3332,3338,3342,3346,3351,3371],{"__ignoreMap":200},[204,3141,3142,3144,3146,3148],{"class":206,"line":207},[204,3143,431],{"class":430},[204,3145,2160],{"class":434},[204,3147,438],{"class":430},[204,3149,3150],{"class":434}," RunnablePassthrough, RunnableParallel\n",[204,3152,3153,3155,3157,3159],{"class":206,"line":213},[204,3154,431],{"class":430},[204,3156,2806],{"class":434},[204,3158,438],{"class":430},[204,3160,2811],{"class":434},[204,3162,3163,3165,3167,3169],{"class":206,"line":219},[204,3164,431],{"class":430},[204,3166,448],{"class":434},[204,3168,438],{"class":430},[204,3170,453],{"class":434},[204,3172,3173],{"class":206,"line":225},[204,3174,229],{"emptyLinePlaceholder":228},[204,3176,3177],{"class":206,"line":232},[204,3178,3179],{"class":462},"# 1. 定义组件\n",[204,3181,3182,3185,3187,3190,3193,3195,3198],{"class":206,"line":238},[204,3183,3184],{"class":434},"model ",[204,3186,471],{"class":430},[204,3188,3189],{"class":434}," ChatOpenAI(",[204,3191,3192],{"class":767},"model",[204,3194,471],{"class":430},[204,3196,3197],{"class":482},"\"gpt-3.5-turbo\"",[204,3199,520],{"class":434},[204,3201,3202,3205,3207,3209,3211,3213,3216,3219],{"class":206,"line":244},[204,3203,3204],{"class":434},"fallback_model ",[204,3206,471],{"class":430},[204,3208,3189],{"class":434},[204,3210,3192],{"class":767},[204,3212,471],{"class":430},[204,3214,3215],{"class":482},"\"gpt-4o\"",[204,3217,3218],{"class":434},") ",[204,3220,3221],{"class":462},"# 备用模型\n",[204,3223,3224,3226,3228],{"class":206,"line":250},[204,3225,2179],{"class":434},[204,3227,471],{"class":430},[204,3229,2184],{"class":434},[204,3231,3232],{"class":206,"line":255},[204,3233,229],{"emptyLinePlaceholder":228},[204,3235,3236],{"class":206,"line":261},[204,3237,3238],{"class":462},"# 2. 构建具备容错的模型组件\n",[204,3240,3241,3244,3246],{"class":206,"line":267},[204,3242,3243],{"class":434},"smart_model ",[204,3245,471],{"class":430},[204,3247,3248],{"class":434}," model.with_fallbacks([fallback_model])\n",[204,3250,3251],{"class":206,"line":272},[204,3252,229],{"emptyLinePlaceholder":228},[204,3254,3255],{"class":206,"line":278},[204,3256,3257],{"class":462},"# 3. 组合链（LCEL）\n",[204,3259,3260,3263,3265],{"class":206,"line":284},[204,3261,3262],{"class":434},"chain ",[204,3264,471],{"class":430},[204,3266,2262],{"class":434},[204,3268,3269],{"class":206,"line":290},[204,3270,3271],{"class":462},"    # 第一阶段：并行准备数据\n",[204,3273,3274],{"class":206,"line":296},[204,3275,3276],{"class":462},"    # context 去搜库，question 直接透传\n",[204,3278,3279],{"class":206,"line":301},[204,3280,3281],{"class":434},"    RunnableParallel({\n",[204,3283,3284,3287],{"class":206,"line":307},[204,3285,3286],{"class":482},"        \"context\"",[204,3288,3289],{"class":434},": retriever,\n",[204,3291,3292,3295],{"class":206,"line":313},[204,3293,3294],{"class":482},"        \"question\"",[204,3296,3297],{"class":434},": RunnablePassthrough()\n",[204,3299,3300],{"class":206,"line":872},[204,3301,3302],{"class":434},"    })\n",[204,3304,3305],{"class":206,"line":886},[204,3306,3307],{"class":462},"    # 第二阶段：填入 Prompt\n",[204,3309,3310,3312],{"class":206,"line":906},[204,3311,2289],{"class":430},[204,3313,3314],{"class":434}," prompt_template \n",[204,3316,3317],{"class":206,"line":911},[204,3318,3319],{"class":462},"    # 第三阶段：调用具备回退机制的模型\n",[204,3321,3322,3324],{"class":206,"line":916},[204,3323,2289],{"class":430},[204,3325,3326],{"class":434}," smart_model \n",[204,3328,3329],{"class":206,"line":922},[204,3330,3331],{"class":462},"    # 第四阶段：标准化解析\n",[204,3333,3334,3336],{"class":206,"line":937},[204,3335,2289],{"class":430},[204,3337,2306],{"class":434},[204,3339,3340],{"class":206,"line":947},[204,3341,520],{"class":434},[204,3343,3344],{"class":206,"line":1177},[204,3345,229],{"emptyLinePlaceholder":228},[204,3347,3348],{"class":206,"line":1183},[204,3349,3350],{"class":462},"# 4. 生产级调用：流式输出\n",[204,3352,3354,3357,3360,3363,3366,3369],{"class":206,"line":3353},30,[204,3355,3356],{"class":430},"for",[204,3358,3359],{"class":434}," chunk ",[204,3361,3362],{"class":430},"in",[204,3364,3365],{"class":434}," chain.stream(",[204,3367,3368],{"class":482},"\"什么是量子纠缠？\"",[204,3370,732],{"class":434},[204,3372,3374,3377,3380,3383,3385,3388,3390,3393,3395,3398],{"class":206,"line":3373},31,[204,3375,3376],{"class":492},"    print",[204,3378,3379],{"class":434},"(chunk, ",[204,3381,3382],{"class":767},"end",[204,3384,471],{"class":430},[204,3386,3387],{"class":482},"\"\"",[204,3389,486],{"class":434},[204,3391,3392],{"class":767},"flush",[204,3394,471],{"class":430},[204,3396,3397],{"class":492},"True",[204,3399,520],{"class":434},[63,3401,3403],{"id":3402},"_31-数据流控与字典调度","3.1 数据流控与字典调度",[15,3405,3406,3407,104,3411,3416],{},"在实际开发中，Prompt 通常需要多个参数（比如：上下文 + 问题）。但上一个组件的输出往往只是一个字符串。如何把单一的输出拆分成 Prompt 需要的“字典”？这就是 ",[25,3408,3409],{},[85,3410,3068],{},[25,3412,3413],{},[85,3414,3415],{},"RunnablePassthrough"," 的战场。",[15,3418,3419],{},[25,3420,3421],{},"核心组件说明",[76,3423,3424,3431],{},[22,3425,3426,3430],{},[25,3427,3428],{},[85,3429,3068],{}," ：并行执行多个任务，并将结果组合成一个字典。它是 Prompt 的“供货商”。",[22,3432,3433,3437],{},[25,3434,3435],{},[85,3436,3415],{}," ：顾名思义，“透传”。它不做任何改动，直接把上游的数据传给下游，常用于保留用户的原始输入。",[15,3439,3440],{},[25,3441,3442],{},"代码实操：模拟 RAG 的数据准备",[15,3444,3445],{},"假设我们要实现：用户输入一个关键词，我们同时生成它的“百科定义”和“情感分析”，最后汇总给模型。",[195,3447,3449],{"className":421,"code":3448,"language":423,"meta":200,"style":200},"from langchain_core.runnables import RunnableParallel, RunnablePassthrough\nfrom langchain_core.prompts import ChatPromptTemplate\nfrom langchain_openai import ChatOpenAI\nfrom langchain_core.output_parsers import StrOutputParser\n\nmodel = ChatOpenAI(model=\"gpt-3.5-turbo\")\n\n# 1. 模拟两个独立的处理逻辑\ndef fake_retriever(question):\n    return f\"关于 {question} 的专业背景知识...\"\n\ndef sentiment_analysis(question):\n    return \"积极/正面\"\n\n# 2. 构建并行准备层\n# 这步会将原始输入 {\"topic\": \"AI\"} 转化为一个包含三个 key 的字典\nmap_setup = RunnableParallel(\n    context=RunnableLambda(fake_retriever),\n    sentiment=RunnableLambda(sentiment_analysis),\n    question=RunnablePassthrough() # 直接透传用户的原始输入\n)\n\n# 3. 最终的 Prompt\nprompt = ChatPromptTemplate.from_template(\"\"\"\n背景：{context}\n情感基调：{sentiment}\n请结合以上信息，回答用户的问题：{question}\n\"\"\")\n\n# 4. 组合链\nfull_chain = map_setup | prompt | model | StrOutputParser()\n\n# 运行\n# 注意：我们传进去的是一个简单的字符串\nprint(full_chain.invoke(\"量子计算\"))\n",[85,3450,3451,3462,3472,3482,3492,3496,3512,3516,3521,3532,3553,3557,3566,3573,3577,3582,3587,3597,3607,3617,3630,3634,3638,3643,3655,3662,3670,3677,3684,3688,3693,3716,3721,3727,3733],{"__ignoreMap":200},[204,3452,3453,3455,3457,3459],{"class":206,"line":207},[204,3454,431],{"class":430},[204,3456,2160],{"class":434},[204,3458,438],{"class":430},[204,3460,3461],{"class":434}," RunnableParallel, RunnablePassthrough\n",[204,3463,3464,3466,3468,3470],{"class":206,"line":213},[204,3465,431],{"class":430},[204,3467,435],{"class":434},[204,3469,438],{"class":430},[204,3471,441],{"class":434},[204,3473,3474,3476,3478,3480],{"class":206,"line":219},[204,3475,431],{"class":430},[204,3477,448],{"class":434},[204,3479,438],{"class":430},[204,3481,453],{"class":434},[204,3483,3484,3486,3488,3490],{"class":206,"line":225},[204,3485,431],{"class":430},[204,3487,2806],{"class":434},[204,3489,438],{"class":430},[204,3491,2811],{"class":434},[204,3493,3494],{"class":206,"line":232},[204,3495,229],{"emptyLinePlaceholder":228},[204,3497,3498,3500,3502,3504,3506,3508,3510],{"class":206,"line":238},[204,3499,3184],{"class":434},[204,3501,471],{"class":430},[204,3503,3189],{"class":434},[204,3505,3192],{"class":767},[204,3507,471],{"class":430},[204,3509,3197],{"class":482},[204,3511,520],{"class":434},[204,3513,3514],{"class":206,"line":244},[204,3515,229],{"emptyLinePlaceholder":228},[204,3517,3518],{"class":206,"line":250},[204,3519,3520],{"class":462},"# 1. 模拟两个独立的处理逻辑\n",[204,3522,3523,3526,3529],{"class":206,"line":255},[204,3524,3525],{"class":430},"def",[204,3527,3528],{"class":722}," fake_retriever",[204,3530,3531],{"class":434},"(question):\n",[204,3533,3534,3537,3540,3543,3546,3548,3550],{"class":206,"line":261},[204,3535,3536],{"class":430},"    return",[204,3538,3539],{"class":430}," f",[204,3541,3542],{"class":482},"\"关于 ",[204,3544,3545],{"class":492},"{",[204,3547,2364],{"class":434},[204,3549,2084],{"class":492},[204,3551,3552],{"class":482}," 的专业背景知识...\"\n",[204,3554,3555],{"class":206,"line":267},[204,3556,229],{"emptyLinePlaceholder":228},[204,3558,3559,3561,3564],{"class":206,"line":272},[204,3560,3525],{"class":430},[204,3562,3563],{"class":722}," sentiment_analysis",[204,3565,3531],{"class":434},[204,3567,3568,3570],{"class":206,"line":278},[204,3569,3536],{"class":430},[204,3571,3572],{"class":482}," \"积极/正面\"\n",[204,3574,3575],{"class":206,"line":284},[204,3576,229],{"emptyLinePlaceholder":228},[204,3578,3579],{"class":206,"line":290},[204,3580,3581],{"class":462},"# 2. 构建并行准备层\n",[204,3583,3584],{"class":206,"line":296},[204,3585,3586],{"class":462},"# 这步会将原始输入 {\"topic\": \"AI\"} 转化为一个包含三个 key 的字典\n",[204,3588,3589,3592,3594],{"class":206,"line":301},[204,3590,3591],{"class":434},"map_setup ",[204,3593,471],{"class":430},[204,3595,3596],{"class":434}," RunnableParallel(\n",[204,3598,3599,3602,3604],{"class":206,"line":307},[204,3600,3601],{"class":767},"    context",[204,3603,471],{"class":430},[204,3605,3606],{"class":434},"RunnableLambda(fake_retriever),\n",[204,3608,3609,3612,3614],{"class":206,"line":313},[204,3610,3611],{"class":767},"    sentiment",[204,3613,471],{"class":430},[204,3615,3616],{"class":434},"RunnableLambda(sentiment_analysis),\n",[204,3618,3619,3622,3624,3627],{"class":206,"line":872},[204,3620,3621],{"class":767},"    question",[204,3623,471],{"class":430},[204,3625,3626],{"class":434},"RunnablePassthrough() ",[204,3628,3629],{"class":462},"# 直接透传用户的原始输入\n",[204,3631,3632],{"class":206,"line":886},[204,3633,520],{"class":434},[204,3635,3636],{"class":206,"line":906},[204,3637,229],{"emptyLinePlaceholder":228},[204,3639,3640],{"class":206,"line":911},[204,3641,3642],{"class":462},"# 3. 最终的 Prompt\n",[204,3644,3645,3647,3649,3652],{"class":206,"line":916},[204,3646,2238],{"class":434},[204,3648,471],{"class":430},[204,3650,3651],{"class":434}," ChatPromptTemplate.from_template(",[204,3653,3654],{"class":482},"\"\"\"\n",[204,3656,3657,3660],{"class":206,"line":922},[204,3658,3659],{"class":482},"背景：",[204,3661,2220],{"class":492},[204,3663,3664,3667],{"class":206,"line":937},[204,3665,3666],{"class":482},"情感基调：",[204,3668,3669],{"class":492},"{sentiment}\n",[204,3671,3672,3675],{"class":206,"line":947},[204,3673,3674],{"class":482},"请结合以上信息，回答用户的问题：",[204,3676,2228],{"class":492},[204,3678,3679,3682],{"class":206,"line":1177},[204,3680,3681],{"class":482},"\"\"\"",[204,3683,520],{"class":434},[204,3685,3686],{"class":206,"line":1183},[204,3687,229],{"emptyLinePlaceholder":228},[204,3689,3690],{"class":206,"line":3353},[204,3691,3692],{"class":462},"# 4. 组合链\n",[204,3694,3695,3698,3700,3703,3705,3708,3710,3712,3714],{"class":206,"line":3373},[204,3696,3697],{"class":434},"full_chain ",[204,3699,471],{"class":430},[204,3701,3702],{"class":434}," map_setup ",[204,3704,2901],{"class":430},[204,3706,3707],{"class":434}," prompt ",[204,3709,2901],{"class":430},[204,3711,2904],{"class":434},[204,3713,2901],{"class":430},[204,3715,2306],{"class":434},[204,3717,3719],{"class":206,"line":3718},32,[204,3720,229],{"emptyLinePlaceholder":228},[204,3722,3724],{"class":206,"line":3723},33,[204,3725,3726],{"class":462},"# 运行\n",[204,3728,3730],{"class":206,"line":3729},34,[204,3731,3732],{"class":462},"# 注意：我们传进去的是一个简单的字符串\n",[204,3734,3736,3738,3741,3744],{"class":206,"line":3735},35,[204,3737,582],{"class":492},[204,3739,3740],{"class":434},"(full_chain.invoke(",[204,3742,3743],{"class":482},"\"量子计算\"",[204,3745,3746],{"class":434},"))\n",[15,3748,3749],{},[25,3750,3751],{},"关键点拨 💡",[15,3753,3754,3755,3757,3758,3761,3762,3765],{},"在这个链条中，",[85,3756,3068],{}," 就像是一个",[25,3759,3760],{},"数据分流器","。它接收到“量子计算”后，同时把它送到了三个地方，最后又把结果打包成 ",[85,3763,3764],{},"{ \"context\": ..., \"sentiment\": ..., \"question\": ... }"," 喂给了 Prompt。",[63,3767,3769],{"id":3768},"_32-动态路由","3.2 动态路由",[15,3771,3772],{},"在资深工程师的架构设计中，程序不应该是死板的直线，而应该能根据输入内容走向不同的分支。例如：如果用户问的是“编程问题”，走代码模型链；如果问的是“文学创作”，走创意模型链。",[15,3774,3775],{},"在 LangChain 中，实现路由主要有两种方式：",[19,3777,3778,3787],{},[22,3779,3780,3786],{},[25,3781,3782,3783],{},"使用 ",[85,3784,3785],{},"RunnableBranch","（传统的 if-else 声明式语法）。",[22,3788,3789,3792],{},[25,3790,3791],{},"使用自定义函数","（更灵活，目前官方更推荐的方式）。",[15,3794,3795],{},[25,3796,3797],{},"场景实操：智能客服分类器",[15,3799,3800],{},"我们来构建一个逻辑：判断用户的输入，如果是关于“技术支持”，我们就用一种严肃的口吻回答；如果是“闲聊”，我们就用幽默的口吻。",[195,3802,3804],{"className":421,"code":3803,"language":423,"meta":200,"style":200},"from langchain_core.runnables import RunnableLambda\nfrom langchain_openai import ChatOpenAI\nfrom langchain_core.prompts import ChatPromptTemplate\nfrom langchain_core.output_parsers import StrOutputParser\n\n# 1. 定义两个不同的分支链\ntech_chain = ChatPromptTemplate.from_template(\"你是一位严谨的工程师，请回答技术问题：{input}\") | ChatOpenAI()\nchat_chain = ChatPromptTemplate.from_template(\"你是一位幽默的脱口秀演员，请回应：{input}\") | ChatOpenAI()\n\n# 2. 定义分类逻辑\ndef route(info):\n    # 这里的 info 会接收上游传来的数据\n    if \"bug\" in info[\"topic\"].lower() or \"代码\" in info[\"topic\"]:\n        return tech_chain\n    else:\n        return chat_chain\n\n# 3. 构建全链\n# 输入数据 -> 提取/准备数据 -> 路由判断 -> 执行对应分支\nfull_route_chain = (\n    {\"topic\": RunnablePassthrough()} # 包装输入\n    | RunnableLambda(route)          # 核心：根据输入返回另一个 Runnable(也就是链)\n)\n\n# 4. 测试\nprint(\"--- 技术路径 ---\")\nprint(full_route_chain.invoke(\"我的代码报错了\"))\n\nprint(\"\\n--- 闲聊路径 ---\")\nprint(full_route_chain.invoke(\"今天天气不错\"))\n",[85,3805,3806,3817,3827,3837,3847,3851,3856,3880,3902,3906,3911,3921,3926,3961,3969,3977,3984,3988,3993,3998,4007,4019,4029,4033,4037,4042,4053,4065,4069,4084],{"__ignoreMap":200},[204,3807,3808,3810,3812,3814],{"class":206,"line":207},[204,3809,431],{"class":430},[204,3811,2160],{"class":434},[204,3813,438],{"class":430},[204,3815,3816],{"class":434}," RunnableLambda\n",[204,3818,3819,3821,3823,3825],{"class":206,"line":213},[204,3820,431],{"class":430},[204,3822,448],{"class":434},[204,3824,438],{"class":430},[204,3826,453],{"class":434},[204,3828,3829,3831,3833,3835],{"class":206,"line":219},[204,3830,431],{"class":430},[204,3832,435],{"class":434},[204,3834,438],{"class":430},[204,3836,441],{"class":434},[204,3838,3839,3841,3843,3845],{"class":206,"line":225},[204,3840,431],{"class":430},[204,3842,2806],{"class":434},[204,3844,438],{"class":430},[204,3846,2811],{"class":434},[204,3848,3849],{"class":206,"line":232},[204,3850,229],{"emptyLinePlaceholder":228},[204,3852,3853],{"class":206,"line":238},[204,3854,3855],{"class":462},"# 1. 定义两个不同的分支链\n",[204,3857,3858,3861,3863,3865,3868,3871,3873,3875,3877],{"class":206,"line":244},[204,3859,3860],{"class":434},"tech_chain ",[204,3862,471],{"class":430},[204,3864,3651],{"class":434},[204,3866,3867],{"class":482},"\"你是一位严谨的工程师，请回答技术问题：",[204,3869,3870],{"class":492},"{input}",[204,3872,2868],{"class":482},[204,3874,3218],{"class":434},[204,3876,2901],{"class":430},[204,3878,3879],{"class":434}," ChatOpenAI()\n",[204,3881,3882,3885,3887,3889,3892,3894,3896,3898,3900],{"class":206,"line":250},[204,3883,3884],{"class":434},"chat_chain ",[204,3886,471],{"class":430},[204,3888,3651],{"class":434},[204,3890,3891],{"class":482},"\"你是一位幽默的脱口秀演员，请回应：",[204,3893,3870],{"class":492},[204,3895,2868],{"class":482},[204,3897,3218],{"class":434},[204,3899,2901],{"class":430},[204,3901,3879],{"class":434},[204,3903,3904],{"class":206,"line":255},[204,3905,229],{"emptyLinePlaceholder":228},[204,3907,3908],{"class":206,"line":261},[204,3909,3910],{"class":462},"# 2. 定义分类逻辑\n",[204,3912,3913,3915,3918],{"class":206,"line":267},[204,3914,3525],{"class":430},[204,3916,3917],{"class":722}," route",[204,3919,3920],{"class":434},"(info):\n",[204,3922,3923],{"class":206,"line":272},[204,3924,3925],{"class":462},"    # 这里的 info 会接收上游传来的数据\n",[204,3927,3928,3931,3934,3937,3940,3943,3946,3949,3952,3954,3956,3958],{"class":206,"line":278},[204,3929,3930],{"class":430},"    if",[204,3932,3933],{"class":482}," \"bug\"",[204,3935,3936],{"class":430}," in",[204,3938,3939],{"class":434}," info[",[204,3941,3942],{"class":482},"\"topic\"",[204,3944,3945],{"class":434},"].lower() ",[204,3947,3948],{"class":430},"or",[204,3950,3951],{"class":482}," \"代码\"",[204,3953,3936],{"class":430},[204,3955,3939],{"class":434},[204,3957,3942],{"class":482},[204,3959,3960],{"class":434},"]:\n",[204,3962,3963,3966],{"class":206,"line":284},[204,3964,3965],{"class":430},"        return",[204,3967,3968],{"class":434}," tech_chain\n",[204,3970,3971,3974],{"class":206,"line":290},[204,3972,3973],{"class":430},"    else",[204,3975,3976],{"class":434},":\n",[204,3978,3979,3981],{"class":206,"line":296},[204,3980,3965],{"class":430},[204,3982,3983],{"class":434}," chat_chain\n",[204,3985,3986],{"class":206,"line":301},[204,3987,229],{"emptyLinePlaceholder":228},[204,3989,3990],{"class":206,"line":307},[204,3991,3992],{"class":462},"# 3. 构建全链\n",[204,3994,3995],{"class":206,"line":313},[204,3996,3997],{"class":462},"# 输入数据 -> 提取/准备数据 -> 路由判断 -> 执行对应分支\n",[204,3999,4000,4003,4005],{"class":206,"line":872},[204,4001,4002],{"class":434},"full_route_chain ",[204,4004,471],{"class":430},[204,4006,2262],{"class":434},[204,4008,4009,4011,4013,4016],{"class":206,"line":886},[204,4010,2272],{"class":434},[204,4012,3942],{"class":482},[204,4014,4015],{"class":434},": RunnablePassthrough()} ",[204,4017,4018],{"class":462},"# 包装输入\n",[204,4020,4021,4023,4026],{"class":206,"line":906},[204,4022,2289],{"class":430},[204,4024,4025],{"class":434}," RunnableLambda(route)          ",[204,4027,4028],{"class":462},"# 核心：根据输入返回另一个 Runnable(也就是链)\n",[204,4030,4031],{"class":206,"line":911},[204,4032,520],{"class":434},[204,4034,4035],{"class":206,"line":916},[204,4036,229],{"emptyLinePlaceholder":228},[204,4038,4039],{"class":206,"line":922},[204,4040,4041],{"class":462},"# 4. 测试\n",[204,4043,4044,4046,4048,4051],{"class":206,"line":937},[204,4045,582],{"class":492},[204,4047,726],{"class":434},[204,4049,4050],{"class":482},"\"--- 技术路径 ---\"",[204,4052,520],{"class":434},[204,4054,4055,4057,4060,4063],{"class":206,"line":947},[204,4056,582],{"class":492},[204,4058,4059],{"class":434},"(full_route_chain.invoke(",[204,4061,4062],{"class":482},"\"我的代码报错了\"",[204,4064,3746],{"class":434},[204,4066,4067],{"class":206,"line":1177},[204,4068,229],{"emptyLinePlaceholder":228},[204,4070,4071,4073,4075,4077,4079,4082],{"class":206,"line":1183},[204,4072,582],{"class":492},[204,4074,726],{"class":434},[204,4076,2868],{"class":482},[204,4078,1578],{"class":492},[204,4080,4081],{"class":482},"--- 闲聊路径 ---\"",[204,4083,520],{"class":434},[204,4085,4086,4088,4090,4093],{"class":206,"line":3353},[204,4087,582],{"class":492},[204,4089,4059],{"class":434},[204,4091,4092],{"class":482},"\"今天天气不错\"",[204,4094,3746],{"class":434},[15,4096,4097],{},[25,4098,4099],{},"深度解析 🧠",[15,4101,4102,4103,4108,4109,1224,4112,4114,4115,4118],{},"这里的关键在于 ",[25,4104,4105],{},[85,4106,4107],{},"RunnableLambda(route)","。在 LCEL 中，如果一个函数返回的是另一个 ",[85,4110,4111],{},"Chain",[85,4113,3038],{},"，LangChain 会",[25,4116,4117],{},"自动执行","返回的那个链。这就像是你在十字路口问路，路标（route 函数）不仅告诉你该往哪走，还直接把你送到了目的地。",[324,4120],{},[63,4122,4124],{"id":4123},"_33-状态透传与中间变量保留","3.3 状态透传与中间变量保留",[15,4126,4127,4128,4130,4131,4134],{},"在 LCEL 中，如果直接使用管道符 ",[85,4129,2901],{},"，上一步的输出会完全",[25,4132,4133],{},"替换","掉输入。但很多时候，我们需要不断地在原有的数据包上“累加”新信息，而不是替换它。",[15,4136,4137,4140,4141,4144],{},[85,4138,4139],{},"RunnablePassthrough.assign()"," 的妙处在于它实现了类似于 Python 字典中 ",[85,4142,4143],{},"{**old_dict, \"new_key\": \"new_value\"}"," 的逻辑。",[15,4146,4147],{},[25,4148,4149],{},"代码实操：模拟一个“意图识别 + 背景检索”的链",[195,4151,4153],{"className":421,"code":4152,"language":423,"meta":200,"style":200},"from langchain_core.runnables import RunnablePassthrough\nfrom operator import itemgetter\n\n# 1. 模拟一个简单的意图分析函数\ndef analyze_intent(input_dict):\n    text = input_dict[\"question\"]\n    return \"技术咨询\" if \"代码\" in text else \"日常闲聊\"\n\n# 2. 模拟一个简单的检索函数\ndef fake_retriever(input_dict):\n    return f\"检索到关于 [{input_dict['intent']}] 的相关文档...\"\n\n# 3. 使用 assign 构建累加状态的链\nchain = (\n    # 第一步：把输入的字符串包装成字典 {\"question\": \"...\"}\n    {\"question\": RunnablePassthrough()} \n    # 第二步：保留 question，并追加 intent 字段\n    | RunnablePassthrough.assign(intent=analyze_intent)\n    # 第三步：保留之前的 question 和 intent，并追加 context 字段\n    | RunnablePassthrough.assign(context=fake_retriever)\n)\n\n# 运行测试\nresult = chain.invoke(\"帮我看看这段 Python 代码\")\nprint(result)\n# 输出结果会是一个完整的字典：{'question': '...', 'intent': '技术咨询', 'context': '...'}\n",[85,4154,4155,4165,4177,4181,4186,4196,4211,4234,4238,4243,4251,4275,4279,4284,4292,4297,4306,4311,4326,4331,4344,4348,4352,4357,4372,4379],{"__ignoreMap":200},[204,4156,4157,4159,4161,4163],{"class":206,"line":207},[204,4158,431],{"class":430},[204,4160,2160],{"class":434},[204,4162,438],{"class":430},[204,4164,2165],{"class":434},[204,4166,4167,4169,4172,4174],{"class":206,"line":213},[204,4168,431],{"class":430},[204,4170,4171],{"class":434}," operator ",[204,4173,438],{"class":430},[204,4175,4176],{"class":434}," itemgetter\n",[204,4178,4179],{"class":206,"line":219},[204,4180,229],{"emptyLinePlaceholder":228},[204,4182,4183],{"class":206,"line":225},[204,4184,4185],{"class":462},"# 1. 模拟一个简单的意图分析函数\n",[204,4187,4188,4190,4193],{"class":206,"line":232},[204,4189,3525],{"class":430},[204,4191,4192],{"class":722}," analyze_intent",[204,4194,4195],{"class":434},"(input_dict):\n",[204,4197,4198,4201,4203,4206,4208],{"class":206,"line":238},[204,4199,4200],{"class":434},"    text ",[204,4202,471],{"class":430},[204,4204,4205],{"class":434}," input_dict[",[204,4207,2281],{"class":482},[204,4209,4210],{"class":434},"]\n",[204,4212,4213,4215,4218,4221,4223,4225,4228,4231],{"class":206,"line":244},[204,4214,3536],{"class":430},[204,4216,4217],{"class":482}," \"技术咨询\"",[204,4219,4220],{"class":430}," if",[204,4222,3951],{"class":482},[204,4224,3936],{"class":430},[204,4226,4227],{"class":434}," text ",[204,4229,4230],{"class":430},"else",[204,4232,4233],{"class":482}," \"日常闲聊\"\n",[204,4235,4236],{"class":206,"line":250},[204,4237,229],{"emptyLinePlaceholder":228},[204,4239,4240],{"class":206,"line":255},[204,4241,4242],{"class":462},"# 2. 模拟一个简单的检索函数\n",[204,4244,4245,4247,4249],{"class":206,"line":261},[204,4246,3525],{"class":430},[204,4248,3528],{"class":722},[204,4250,4195],{"class":434},[204,4252,4253,4255,4257,4260,4262,4265,4268,4270,4272],{"class":206,"line":267},[204,4254,3536],{"class":430},[204,4256,3539],{"class":430},[204,4258,4259],{"class":482},"\"检索到关于 [",[204,4261,3545],{"class":492},[204,4263,4264],{"class":434},"input_dict[",[204,4266,4267],{"class":482},"'intent'",[204,4269,2118],{"class":434},[204,4271,2084],{"class":492},[204,4273,4274],{"class":482},"] 的相关文档...\"\n",[204,4276,4277],{"class":206,"line":272},[204,4278,229],{"emptyLinePlaceholder":228},[204,4280,4281],{"class":206,"line":278},[204,4282,4283],{"class":462},"# 3. 使用 assign 构建累加状态的链\n",[204,4285,4286,4288,4290],{"class":206,"line":284},[204,4287,3262],{"class":434},[204,4289,471],{"class":430},[204,4291,2262],{"class":434},[204,4293,4294],{"class":206,"line":290},[204,4295,4296],{"class":462},"    # 第一步：把输入的字符串包装成字典 {\"question\": \"...\"}\n",[204,4298,4299,4301,4303],{"class":206,"line":296},[204,4300,2272],{"class":434},[204,4302,2281],{"class":482},[204,4304,4305],{"class":434},": RunnablePassthrough()} \n",[204,4307,4308],{"class":206,"line":301},[204,4309,4310],{"class":462},"    # 第二步：保留 question，并追加 intent 字段\n",[204,4312,4313,4315,4318,4321,4323],{"class":206,"line":307},[204,4314,2289],{"class":430},[204,4316,4317],{"class":434}," RunnablePassthrough.assign(",[204,4319,4320],{"class":767},"intent",[204,4322,471],{"class":430},[204,4324,4325],{"class":434},"analyze_intent)\n",[204,4327,4328],{"class":206,"line":313},[204,4329,4330],{"class":462},"    # 第三步：保留之前的 question 和 intent，并追加 context 字段\n",[204,4332,4333,4335,4337,4339,4341],{"class":206,"line":872},[204,4334,2289],{"class":430},[204,4336,4317],{"class":434},[204,4338,2376],{"class":767},[204,4340,471],{"class":430},[204,4342,4343],{"class":434},"fake_retriever)\n",[204,4345,4346],{"class":206,"line":886},[204,4347,520],{"class":434},[204,4349,4350],{"class":206,"line":906},[204,4351,229],{"emptyLinePlaceholder":228},[204,4353,4354],{"class":206,"line":911},[204,4355,4356],{"class":462},"# 运行测试\n",[204,4358,4359,4362,4364,4367,4370],{"class":206,"line":916},[204,4360,4361],{"class":434},"result ",[204,4363,471],{"class":430},[204,4365,4366],{"class":434}," chain.invoke(",[204,4368,4369],{"class":482},"\"帮我看看这段 Python 代码\"",[204,4371,520],{"class":434},[204,4373,4374,4376],{"class":206,"line":922},[204,4375,582],{"class":492},[204,4377,4378],{"class":434},"(result)\n",[204,4380,4381],{"class":206,"line":937},[204,4382,4383],{"class":462},"# 输出结果会是一个完整的字典：{'question': '...', 'intent': '技术咨询', 'context': '...'}\n",[324,4385],{},[63,4387,4389],{"id":4388},"_34-容错与回退","3.4 容错与回退",[15,4391,4392],{},"在资深工程师的视野里，模型是不稳定的（可能会超时、触发布控或额度耗尽）。如果你的链条很长，中间任何一个环节断了，整个服务就会崩溃。",[15,4394,4395,4396,4399],{},"LangChain 提供了 ",[85,4397,4398],{},".with_fallbacks()"," 方法，允许你为某个组件定义“备胎”。",[195,4401,4403],{"className":421,"code":4402,"language":423,"meta":200,"style":200},"# 假设我们有一个昂贵但强大的模型，和一个便宜但基础的模型\nexpensive_model = ChatOpenAI(model=\"gpt-4\")\ncheap_model = ChatOpenAI(model=\"gpt-3.5-turbo\")\n\n# 定义一个带回退机制的可执行对象\n# 如果 expensive_model 调用失败，它会自动切换到 cheap_model\nrobust_model = expensive_model.with_fallbacks([cheap_model])\n\n# 这样你的链条就具备了生产级的稳定性\nchain = prompt | robust_model | StrOutputParser()\n",[85,4404,4405,4410,4428,4445,4449,4454,4459,4469,4473,4478],{"__ignoreMap":200},[204,4406,4407],{"class":206,"line":207},[204,4408,4409],{"class":462},"# 假设我们有一个昂贵但强大的模型，和一个便宜但基础的模型\n",[204,4411,4412,4415,4417,4419,4421,4423,4426],{"class":206,"line":213},[204,4413,4414],{"class":434},"expensive_model ",[204,4416,471],{"class":430},[204,4418,3189],{"class":434},[204,4420,3192],{"class":767},[204,4422,471],{"class":430},[204,4424,4425],{"class":482},"\"gpt-4\"",[204,4427,520],{"class":434},[204,4429,4430,4433,4435,4437,4439,4441,4443],{"class":206,"line":219},[204,4431,4432],{"class":434},"cheap_model ",[204,4434,471],{"class":430},[204,4436,3189],{"class":434},[204,4438,3192],{"class":767},[204,4440,471],{"class":430},[204,4442,3197],{"class":482},[204,4444,520],{"class":434},[204,4446,4447],{"class":206,"line":225},[204,4448,229],{"emptyLinePlaceholder":228},[204,4450,4451],{"class":206,"line":232},[204,4452,4453],{"class":462},"# 定义一个带回退机制的可执行对象\n",[204,4455,4456],{"class":206,"line":238},[204,4457,4458],{"class":462},"# 如果 expensive_model 调用失败，它会自动切换到 cheap_model\n",[204,4460,4461,4464,4466],{"class":206,"line":244},[204,4462,4463],{"class":434},"robust_model ",[204,4465,471],{"class":430},[204,4467,4468],{"class":434}," expensive_model.with_fallbacks([cheap_model])\n",[204,4470,4471],{"class":206,"line":250},[204,4472,229],{"emptyLinePlaceholder":228},[204,4474,4475],{"class":206,"line":255},[204,4476,4477],{"class":462},"# 这样你的链条就具备了生产级的稳定性\n",[204,4479,4480,4482,4484,4486,4488,4491,4493],{"class":206,"line":261},[204,4481,3262],{"class":434},[204,4483,471],{"class":430},[204,4485,3707],{"class":434},[204,4487,2901],{"class":430},[204,4489,4490],{"class":434}," robust_model ",[204,4492,2901],{"class":430},[204,4494,2306],{"class":434},[15,4496,4497,4498,4501,4502,4505],{},"在标准的 LCEL 中，数据流向通常是一个",[25,4499,4500],{},"有向无环图 (DAG)","，即数据从 A 流向 B，不能从 B 再回到 A。如果你需要实现“观察结果 -> 重新思考 -> 再次行动”这种循环逻辑，我们通常会引入 ",[25,4503,4504],{},"LangGraph","（LangChain 的进化版，专门处理状态循环）。",[58,4507,4509],{"id":4508},"_4-toolskill","4 Tool&Skill",[15,4511,4512,4513,4516],{},"初级开发者把 Tool 看作是一个简单的 Python 函数，但在资深架构师眼中，",[25,4514,4515],{},"Tool 是大模型与企业真实内网环境交互的“危险边缘”","。它涉及到权限控制、并发瓶颈、数据污染以及极高的失败率。",[324,4518],{},[63,4520,4522,4523,390],{"id":4521},"_41-核心基座选型为什么企业级应用必须用-basetool","4.1 核心基座选型：为什么企业级应用必须用 ",[85,4524,4525],{},"BaseTool",[15,4527,4528],{},"LangChain 提供了三种构建工具的方法，但它们在工程上的容错率和扩展性天差地别。",[19,4530,4531],{},[22,4532,4533,4534,4537],{},"**",[85,4535,4536],{},"@tool"," 装饰器 **",[15,4539,4540,4541,4544],{},"它底层利用 Python 的 ",[85,4542,4543],{},"inspect"," 模块去猜你的参数。默认情况下，工具名称来源于函数名称。如果需要更具描述性的名称，请对其进行覆盖：",[195,4546,4548],{"className":421,"code":4547,"language":423,"meta":200,"style":200},"@tool(\"web_search\")  # Custom name\ndef search(query: str) -> str:\n    \"\"\"Search the web for information.\"\"\" #默认情况下，函数的文档字符串会成为工具的描述，帮助模型理解何时使用该工具\n    return f\"Results for: {query}\"\n\nprint(search.name)  # web_search\n",[85,4549,4550,4565,4584,4592,4611,4615],{"__ignoreMap":200},[204,4551,4552,4554,4556,4559,4562],{"class":206,"line":207},[204,4553,4536],{"class":722},[204,4555,726],{"class":434},[204,4557,4558],{"class":482},"\"web_search\"",[204,4560,4561],{"class":434},")  ",[204,4563,4564],{"class":462},"# Custom name\n",[204,4566,4567,4569,4572,4575,4577,4580,4582],{"class":206,"line":213},[204,4568,3525],{"class":430},[204,4570,4571],{"class":722}," search",[204,4573,4574],{"class":434},"(query: ",[204,4576,753],{"class":492},[204,4578,4579],{"class":434},") -> ",[204,4581,753],{"class":492},[204,4583,3976],{"class":434},[204,4585,4586,4589],{"class":206,"line":219},[204,4587,4588],{"class":482},"    \"\"\"Search the web for information.\"\"\"",[204,4590,4591],{"class":462}," #默认情况下，函数的文档字符串会成为工具的描述，帮助模型理解何时使用该工具\n",[204,4593,4594,4596,4598,4601,4603,4606,4608],{"class":206,"line":225},[204,4595,3536],{"class":430},[204,4597,3539],{"class":430},[204,4599,4600],{"class":482},"\"Results for: ",[204,4602,3545],{"class":492},[204,4604,4605],{"class":434},"query",[204,4607,2084],{"class":492},[204,4609,4610],{"class":482},"\"\n",[204,4612,4613],{"class":206,"line":232},[204,4614,229],{"emptyLinePlaceholder":228},[204,4616,4617,4619,4622],{"class":206,"line":238},[204,4618,582],{"class":492},[204,4620,4621],{"class":434},"(search.name)  ",[204,4623,4624],{"class":462},"# web_search\n",[15,4626,4627,4630],{},[25,4628,4629],{},"致命缺陷","：无法持有外部状态。如果你的工具需要复用一个 Redis 连接池或者数据库的 Connection，你只能把它写成全局变量（极不优雅且非线程安全）。",[19,4632,4633],{"start":213},[22,4634,4635],{},[25,4636,4637,4640],{},[85,4638,4639],{},"StructuredTool.from_function"," (中级过渡方案)",[15,4642,4643],{},"适合你想把现有的、别人写好的复杂函数直接包装成工具的场景。",[195,4645,4647],{"className":421,"code":4646,"language":423,"meta":200,"style":200},"from langchain.tools import StructuredTool\n\ndef add(a: int, b: int) -> int:\n    return a + b\n\ntool = StructuredTool.from_function(\n    func=add,\n    name=\"calculator\",\n    description=\"计算两个数的和\"\n)\n",[85,4648,4649,4661,4665,4688,4701,4705,4715,4725,4737,4747],{"__ignoreMap":200},[204,4650,4651,4653,4656,4658],{"class":206,"line":207},[204,4652,431],{"class":430},[204,4654,4655],{"class":434}," langchain.tools ",[204,4657,438],{"class":430},[204,4659,4660],{"class":434}," StructuredTool\n",[204,4662,4663],{"class":206,"line":213},[204,4664,229],{"emptyLinePlaceholder":228},[204,4666,4667,4669,4672,4675,4677,4680,4682,4684,4686],{"class":206,"line":219},[204,4668,3525],{"class":430},[204,4670,4671],{"class":722}," add",[204,4673,4674],{"class":434},"(a: ",[204,4676,1237],{"class":492},[204,4678,4679],{"class":434},", b: ",[204,4681,1237],{"class":492},[204,4683,4579],{"class":434},[204,4685,1237],{"class":492},[204,4687,3976],{"class":434},[204,4689,4690,4692,4695,4698],{"class":206,"line":225},[204,4691,3536],{"class":430},[204,4693,4694],{"class":434}," a ",[204,4696,4697],{"class":430},"+",[204,4699,4700],{"class":434}," b\n",[204,4702,4703],{"class":206,"line":232},[204,4704,229],{"emptyLinePlaceholder":228},[204,4706,4707,4710,4712],{"class":206,"line":238},[204,4708,4709],{"class":434},"tool ",[204,4711,471],{"class":430},[204,4713,4714],{"class":434}," StructuredTool.from_function(\n",[204,4716,4717,4720,4722],{"class":206,"line":244},[204,4718,4719],{"class":767},"    func",[204,4721,471],{"class":430},[204,4723,4724],{"class":434},"add,\n",[204,4726,4727,4730,4732,4735],{"class":206,"line":250},[204,4728,4729],{"class":767},"    name",[204,4731,471],{"class":430},[204,4733,4734],{"class":482},"\"calculator\"",[204,4736,558],{"class":434},[204,4738,4739,4742,4744],{"class":206,"line":255},[204,4740,4741],{"class":767},"    description",[204,4743,471],{"class":430},[204,4745,4746],{"class":482},"\"计算两个数的和\"\n",[204,4748,4749],{"class":206,"line":261},[204,4750,520],{"class":434},[19,4752,4753],{"start":219},[22,4754,4755],{},[25,4756,4757,4758,4760],{},"继承 ",[85,4759,4525],{}," (企业级终极形态)",[15,4762,4763,4764,4767,4768,104,4771,88],{},"这是一种",[25,4765,4766],{},"面向对象","的开发模式。它的巨大优势在于：",[25,4769,4770],{},"依赖注入 (Dependency Injection)",[25,4772,4773],{},"严苛的生命周期控制",[15,4775,4776],{},[25,4777,4778],{},"工程代码模板：构建一个持有状态的工具",[195,4780,4782],{"className":197,"code":4781,"language":199,"meta":200,"style":200},"from typing import Type, Optional\nfrom pydantic import BaseModel, Field\nfrom langchain.tools import BaseTool\n\n# 1. 定义极其严格的数据契约\nclass UserQuerySchema(BaseModel):\n    # Field 的 description 就是喂给大模型的“微型 Prompt”\n    email: str = Field(..., description=\"用户的企业邮箱，必须是 @company.com 结尾\")\n    action: str = Field(..., pattern=\"^(disable|enable|query)$\", description=\"仅限这三种操作\")\n\n# 2. 继承 BaseTool\nclass EnterpriseUserTool(BaseTool):\n    name: str = \"enterprise_user_manager\"\n    description: str = \"用于查询或修改企业内部用户状态。\"\n    args_schema: Type[BaseModel] = UserQuerySchema\n    \n    # 【核心优势】：工具内部可以安全地持有外部依赖（如数据库连接池、鉴权 Token）\n    # 大模型在调用时，\"看不见\"这些属性，也无法生成这些属性\n    db_pool: Optional[any] = None\n    audit_logger: Optional[any] = None\n\n    def _run(self, email: str, action: str) -> str:\n        \"\"\"同步执行逻辑\"\"\"\n        if not email.endswith(\"@company.com\"):\n            return \"Error: 邮箱格式不符合企业规范。\"\n            \n        self.audit_logger.log(f\"执行操作 {action} on {email}\")\n        # 执行 db_pool.execute(...)\n        return f\"操作 {action} 执行成功。\"\n\n    async def _arun(self, email: str, action: str) -> str:\n        \"\"\"异步执行逻辑：在高并发后端（如 FastAPI）中，这能防止线程阻塞\"\"\"\n        # 必须实现真正的 async IO 操作\n        pass\n\n# 初始化时注入依赖\n# tool = EnterpriseUserTool(db_pool=my_pool, audit_logger=my_logger)\n",[85,4783,4784,4789,4794,4799,4803,4808,4813,4818,4823,4828,4832,4837,4842,4847,4852,4857,4861,4866,4871,4876,4881,4885,4890,4895,4900,4905,4910,4915,4920,4925,4929,4934,4939,4944,4949,4953,4959],{"__ignoreMap":200},[204,4785,4786],{"class":206,"line":207},[204,4787,4788],{},"from typing import Type, Optional\n",[204,4790,4791],{"class":206,"line":213},[204,4792,4793],{},"from pydantic import BaseModel, Field\n",[204,4795,4796],{"class":206,"line":219},[204,4797,4798],{},"from langchain.tools import BaseTool\n",[204,4800,4801],{"class":206,"line":225},[204,4802,229],{"emptyLinePlaceholder":228},[204,4804,4805],{"class":206,"line":232},[204,4806,4807],{},"# 1. 定义极其严格的数据契约\n",[204,4809,4810],{"class":206,"line":238},[204,4811,4812],{},"class UserQuerySchema(BaseModel):\n",[204,4814,4815],{"class":206,"line":244},[204,4816,4817],{},"    # Field 的 description 就是喂给大模型的“微型 Prompt”\n",[204,4819,4820],{"class":206,"line":250},[204,4821,4822],{},"    email: str = Field(..., description=\"用户的企业邮箱，必须是 @company.com 结尾\")\n",[204,4824,4825],{"class":206,"line":255},[204,4826,4827],{},"    action: str = Field(..., pattern=\"^(disable|enable|query)$\", description=\"仅限这三种操作\")\n",[204,4829,4830],{"class":206,"line":261},[204,4831,229],{"emptyLinePlaceholder":228},[204,4833,4834],{"class":206,"line":267},[204,4835,4836],{},"# 2. 继承 BaseTool\n",[204,4838,4839],{"class":206,"line":272},[204,4840,4841],{},"class EnterpriseUserTool(BaseTool):\n",[204,4843,4844],{"class":206,"line":278},[204,4845,4846],{},"    name: str = \"enterprise_user_manager\"\n",[204,4848,4849],{"class":206,"line":284},[204,4850,4851],{},"    description: str = \"用于查询或修改企业内部用户状态。\"\n",[204,4853,4854],{"class":206,"line":290},[204,4855,4856],{},"    args_schema: Type[BaseModel] = UserQuerySchema\n",[204,4858,4859],{"class":206,"line":296},[204,4860,745],{},[204,4862,4863],{"class":206,"line":301},[204,4864,4865],{},"    # 【核心优势】：工具内部可以安全地持有外部依赖（如数据库连接池、鉴权 Token）\n",[204,4867,4868],{"class":206,"line":307},[204,4869,4870],{},"    # 大模型在调用时，\"看不见\"这些属性，也无法生成这些属性\n",[204,4872,4873],{"class":206,"line":313},[204,4874,4875],{},"    db_pool: Optional[any] = None\n",[204,4877,4878],{"class":206,"line":872},[204,4879,4880],{},"    audit_logger: Optional[any] = None\n",[204,4882,4883],{"class":206,"line":886},[204,4884,229],{"emptyLinePlaceholder":228},[204,4886,4887],{"class":206,"line":906},[204,4888,4889],{},"    def _run(self, email: str, action: str) -> str:\n",[204,4891,4892],{"class":206,"line":911},[204,4893,4894],{},"        \"\"\"同步执行逻辑\"\"\"\n",[204,4896,4897],{"class":206,"line":916},[204,4898,4899],{},"        if not email.endswith(\"@company.com\"):\n",[204,4901,4902],{"class":206,"line":922},[204,4903,4904],{},"            return \"Error: 邮箱格式不符合企业规范。\"\n",[204,4906,4907],{"class":206,"line":937},[204,4908,4909],{},"            \n",[204,4911,4912],{"class":206,"line":947},[204,4913,4914],{},"        self.audit_logger.log(f\"执行操作 {action} on {email}\")\n",[204,4916,4917],{"class":206,"line":1177},[204,4918,4919],{},"        # 执行 db_pool.execute(...)\n",[204,4921,4922],{"class":206,"line":1183},[204,4923,4924],{},"        return f\"操作 {action} 执行成功。\"\n",[204,4926,4927],{"class":206,"line":3353},[204,4928,229],{"emptyLinePlaceholder":228},[204,4930,4931],{"class":206,"line":3373},[204,4932,4933],{},"    async def _arun(self, email: str, action: str) -> str:\n",[204,4935,4936],{"class":206,"line":3718},[204,4937,4938],{},"        \"\"\"异步执行逻辑：在高并发后端（如 FastAPI）中，这能防止线程阻塞\"\"\"\n",[204,4940,4941],{"class":206,"line":3723},[204,4942,4943],{},"        # 必须实现真正的 async IO 操作\n",[204,4945,4946],{"class":206,"line":3729},[204,4947,4948],{},"        pass\n",[204,4950,4951],{"class":206,"line":3735},[204,4952,229],{"emptyLinePlaceholder":228},[204,4954,4956],{"class":206,"line":4955},36,[204,4957,4958],{},"# 初始化时注入依赖\n",[204,4960,4962],{"class":206,"line":4961},37,[204,4963,4964],{},"# tool = EnterpriseUserTool(db_pool=my_pool, audit_logger=my_logger)\n",[324,4966],{},[63,4968,4970],{"id":4969},"_42-隐式上下文动态参数注入","4.2 隐式上下文：动态参数注入",[15,4972,4973,4976,4977,4980,4981,4987],{},[25,4974,4975],{},"工业级痛点","：假设你在做一个基于 SaaS 的数据分析 Agent。大模型需要调用 ",[85,4978,4979],{},"query_sales_data"," 工具。这个工具在底层执行 SQL 时，",[25,4982,4983,4984],{},"必须加上 ",[85,4985,4986],{},"WHERE tenant_id = 'xxx'","，否则就会引发严重的数据越权（A 公司查到了 B 公司的数据）。",[15,4989,4990,4993,4994,4997],{},[25,4991,4992],{},"错误做法","：在 Prompt 里告诉大模型“你是 A 公司的助手，tenant_id 是 1001”，并让大模型在调用工具时把 ",[85,4995,4996],{},"1001"," 当作参数传进去。",[76,4999,5000],{},[22,5001,5002,5005,5006,5009],{},[1456,5003,5004],{},"为什么错？"," 大模型随时可能幻觉，万一它生成了 ",[85,5007,5008],{},"1002","，数据就泄露了。绝对不能信任模型生成的权限字段。",[15,5011,5012],{},[25,5013,5014],{},"资深做法：参数剥离与运行时注入",[15,5016,5017,5018,155,5021,5024,5025,5028],{},"我们在 Pydantic 契约中",[25,5019,5020],{},"隐藏",[85,5022,5023],{},"tenant_id","，让大模型只生成业务参数。而在工具执行前，利用 LangChain 的 ",[85,5026,5027],{},"RunnableConfig"," 将真实的租户 ID 注入进去。",[195,5030,5032],{"className":197,"code":5031,"language":199,"meta":200,"style":200},"from langchain_core.runnables import RunnableConfig\nfrom langchain_core.tools import tool\n\n# 契约里只有 query，大模型不知道有 tenant_id 的存在\n@tool\ndef query_sales_data(query: str, config: RunnableConfig) -> str:\n    \"\"\"根据自然语言查询销售数据。\"\"\"\n    \n    # 从外层 API 请求上下文中，安全提取出当前登录用户的租户 ID\n    tenant_id = config.get(\"configurable\", {}).get(\"tenant_id\")\n    if not tenant_id:\n        raise ValueError(\"严重安全错误：缺失租户上下文！\")\n        \n    print(f\"[底层安全隔离] 正在查询租户 {tenant_id} 的数据...\")\n    # 执行实际的安全 SQL：SELECT * FROM sales WHERE tenant_id = ?\n    return \"销售数据如下...\"\n\n# 后端框架在调用 Agent 时，强制绑定上下文配置，即这里的tenant_id应该是后端查询后强制绑定的\n# agent_executor.invoke(\n#    {\"input\": \"帮我看看上个月的销量\"}, \n#    config={\"configurable\": {\"tenant_id\": \"tenant_1001\"}}，\n# )\n",[85,5033,5034,5039,5044,5048,5053,5058,5063,5068,5072,5077,5082,5087,5092,5097,5102,5107,5112,5116,5121,5126,5131,5136],{"__ignoreMap":200},[204,5035,5036],{"class":206,"line":207},[204,5037,5038],{},"from langchain_core.runnables import RunnableConfig\n",[204,5040,5041],{"class":206,"line":213},[204,5042,5043],{},"from langchain_core.tools import tool\n",[204,5045,5046],{"class":206,"line":219},[204,5047,229],{"emptyLinePlaceholder":228},[204,5049,5050],{"class":206,"line":225},[204,5051,5052],{},"# 契约里只有 query，大模型不知道有 tenant_id 的存在\n",[204,5054,5055],{"class":206,"line":232},[204,5056,5057],{},"@tool\n",[204,5059,5060],{"class":206,"line":238},[204,5061,5062],{},"def query_sales_data(query: str, config: RunnableConfig) -> str:\n",[204,5064,5065],{"class":206,"line":244},[204,5066,5067],{},"    \"\"\"根据自然语言查询销售数据。\"\"\"\n",[204,5069,5070],{"class":206,"line":250},[204,5071,745],{},[204,5073,5074],{"class":206,"line":255},[204,5075,5076],{},"    # 从外层 API 请求上下文中，安全提取出当前登录用户的租户 ID\n",[204,5078,5079],{"class":206,"line":261},[204,5080,5081],{},"    tenant_id = config.get(\"configurable\", {}).get(\"tenant_id\")\n",[204,5083,5084],{"class":206,"line":267},[204,5085,5086],{},"    if not tenant_id:\n",[204,5088,5089],{"class":206,"line":272},[204,5090,5091],{},"        raise ValueError(\"严重安全错误：缺失租户上下文！\")\n",[204,5093,5094],{"class":206,"line":278},[204,5095,5096],{},"        \n",[204,5098,5099],{"class":206,"line":284},[204,5100,5101],{},"    print(f\"[底层安全隔离] 正在查询租户 {tenant_id} 的数据...\")\n",[204,5103,5104],{"class":206,"line":290},[204,5105,5106],{},"    # 执行实际的安全 SQL：SELECT * FROM sales WHERE tenant_id = ?\n",[204,5108,5109],{"class":206,"line":296},[204,5110,5111],{},"    return \"销售数据如下...\"\n",[204,5113,5114],{"class":206,"line":301},[204,5115,229],{"emptyLinePlaceholder":228},[204,5117,5118],{"class":206,"line":307},[204,5119,5120],{},"# 后端框架在调用 Agent 时，强制绑定上下文配置，即这里的tenant_id应该是后端查询后强制绑定的\n",[204,5122,5123],{"class":206,"line":313},[204,5124,5125],{},"# agent_executor.invoke(\n",[204,5127,5128],{"class":206,"line":872},[204,5129,5130],{},"#    {\"input\": \"帮我看看上个月的销量\"}, \n",[204,5132,5133],{"class":206,"line":886},[204,5134,5135],{},"#    config={\"configurable\": {\"tenant_id\": \"tenant_1001\"}}，\n",[204,5137,5138],{"class":206,"line":906},[204,5139,5140],{},"# )\n",[324,5142],{},[63,5144,5146],{"id":5145},"_43-工具爆炸与智能路由","4.3 工具爆炸与智能路由",[15,5148,5149],{},"随着业务发展，你的系统可能积累了 100 个甚至上千个工具（查天气、查库存、查 HR 政策、建 Jira 工单...）。",[15,5151,5152,5155],{},[25,5153,5154],{},"痛点","：如果你把 100 个工具的 Schema 全部塞进系统提示词，不仅会立刻耗尽 Token，模型还会因为“注意力分散”而频繁调错工具。",[15,5157,5158],{},[25,5159,5160],{},"核心架构拆解：Tool Map 与 Vector Store 的双轨制",[15,5162,5163,5164],{},"在代码实现前，我们必须搞清楚一个逻辑：",[25,5165,5166],{},"大模型需要的是带有完整 Pydantic Schema 的 Python 工具对象，而向量数据库只能存文本和元数据。",[15,5168,5169],{},"因此，我们的架构必须是“双轨制”：",[19,5171,5172,5182],{},[22,5173,5174,5177,5178,5181],{},[25,5175,5176],{},"Tool Registry (工具字典)","：在内存中维护一个 ",[85,5179,5180],{},"{ \"工具名\": 真实的 Python Tool 对象 }"," 的哈希表。",[22,5183,5184,5187,5188,5190,5191,5194],{},[25,5185,5186],{},"Vector Store (向量索引)","：把工具的 ",[85,5189,672],{},"（描述）作为文本向量化，把工具的 ",[85,5192,5193],{},"name","（名称）存入元数据（Metadata）。",[15,5196,5197],{},[25,5198,5199],{},"运行工作流：",[15,5201,5202],{},"用户提问 ➡️ 向量库匹配 description ➡️ 拿到 Top-K 的工具 name ➡️ 去 Tool Registry 中提取真实的 Tool 对象 ➡️ 动态绑定给 LLM。",[324,5204],{},[15,5206,5207],{},[25,5208,5209],{},"详细代码实操：构建动态工具路由系统",[15,5211,5212,5213,5215,5216,5218],{},"我们将使用 ",[85,5214,1940],{}," 作为本地向量库，",[85,5217,13],{}," 构建核心逻辑。",[19,5220,5221],{},[22,5222,5223],{},[25,5224,5225],{},"准备工具集 (模拟企业环境下的众多工具)",[195,5227,5229],{"className":197,"code":5228,"language":199,"meta":200,"style":200},"from langchain_core.tools import tool\nfrom pydantic import BaseModel, Field\n\n# --- 工具 1：财务类 ---\nclass ExpenseSchema(BaseModel):\n    amount: float = Field(..., description=\"报销金额\")\n    category: str = Field(..., description=\"费用类别，如餐饮、交通\")\n\n@tool(\"create_expense_ticket\", args_schema=ExpenseSchema)\ndef create_expense_ticket(amount: float, category: str) -> str:\n    \"\"\"用于创建员工财务报销单。当用户提到“报销”、“打车费”、“发票”时使用。\"\"\"\n    return f\"报销单已生成：{category} - {amount}元\"\n\n# --- 工具 2：HR 类 ---\nclass LeaveSchema(BaseModel):\n    days: int = Field(..., description=\"请假天数\")\n\n@tool(\"apply_leave\", args_schema=LeaveSchema)\ndef apply_leave(days: int) -> str:\n    \"\"\"用于提交员工请假申请。当用户提到“请假”、“休假”、“生病”时使用。\"\"\"\n    return f\"已提交 {days} 天的请假申请。\"\n\n# --- 工具 3：IT 支持类 ---\n@tool(\"reset_vpn_password\")\ndef reset_vpn_password() -> str:\n    \"\"\"用于重置公司 VPN 或网络密码。当用户连不上网、VPN报错时使用。\"\"\"\n    return \"VPN 密码已重置，新密码已发送至企业微信。\"\n\n# 假设我们有上百个工具，这里放入一个列表\nall_enterprise_tools = [create_expense_ticket, apply_leave, reset_vpn_password]\n",[85,5230,5231,5235,5239,5243,5248,5253,5258,5263,5267,5272,5277,5282,5287,5291,5296,5301,5306,5310,5315,5320,5325,5330,5334,5339,5344,5349,5354,5359,5363,5368],{"__ignoreMap":200},[204,5232,5233],{"class":206,"line":207},[204,5234,5043],{},[204,5236,5237],{"class":206,"line":213},[204,5238,4793],{},[204,5240,5241],{"class":206,"line":219},[204,5242,229],{"emptyLinePlaceholder":228},[204,5244,5245],{"class":206,"line":225},[204,5246,5247],{},"# --- 工具 1：财务类 ---\n",[204,5249,5250],{"class":206,"line":232},[204,5251,5252],{},"class ExpenseSchema(BaseModel):\n",[204,5254,5255],{"class":206,"line":238},[204,5256,5257],{},"    amount: float = Field(..., description=\"报销金额\")\n",[204,5259,5260],{"class":206,"line":244},[204,5261,5262],{},"    category: str = Field(..., description=\"费用类别，如餐饮、交通\")\n",[204,5264,5265],{"class":206,"line":250},[204,5266,229],{"emptyLinePlaceholder":228},[204,5268,5269],{"class":206,"line":255},[204,5270,5271],{},"@tool(\"create_expense_ticket\", args_schema=ExpenseSchema)\n",[204,5273,5274],{"class":206,"line":261},[204,5275,5276],{},"def create_expense_ticket(amount: float, category: str) -> str:\n",[204,5278,5279],{"class":206,"line":267},[204,5280,5281],{},"    \"\"\"用于创建员工财务报销单。当用户提到“报销”、“打车费”、“发票”时使用。\"\"\"\n",[204,5283,5284],{"class":206,"line":272},[204,5285,5286],{},"    return f\"报销单已生成：{category} - {amount}元\"\n",[204,5288,5289],{"class":206,"line":278},[204,5290,229],{"emptyLinePlaceholder":228},[204,5292,5293],{"class":206,"line":284},[204,5294,5295],{},"# --- 工具 2：HR 类 ---\n",[204,5297,5298],{"class":206,"line":290},[204,5299,5300],{},"class LeaveSchema(BaseModel):\n",[204,5302,5303],{"class":206,"line":296},[204,5304,5305],{},"    days: int = Field(..., description=\"请假天数\")\n",[204,5307,5308],{"class":206,"line":301},[204,5309,229],{"emptyLinePlaceholder":228},[204,5311,5312],{"class":206,"line":307},[204,5313,5314],{},"@tool(\"apply_leave\", args_schema=LeaveSchema)\n",[204,5316,5317],{"class":206,"line":313},[204,5318,5319],{},"def apply_leave(days: int) -> str:\n",[204,5321,5322],{"class":206,"line":872},[204,5323,5324],{},"    \"\"\"用于提交员工请假申请。当用户提到“请假”、“休假”、“生病”时使用。\"\"\"\n",[204,5326,5327],{"class":206,"line":886},[204,5328,5329],{},"    return f\"已提交 {days} 天的请假申请。\"\n",[204,5331,5332],{"class":206,"line":906},[204,5333,229],{"emptyLinePlaceholder":228},[204,5335,5336],{"class":206,"line":911},[204,5337,5338],{},"# --- 工具 3：IT 支持类 ---\n",[204,5340,5341],{"class":206,"line":916},[204,5342,5343],{},"@tool(\"reset_vpn_password\")\n",[204,5345,5346],{"class":206,"line":922},[204,5347,5348],{},"def reset_vpn_password() -> str:\n",[204,5350,5351],{"class":206,"line":937},[204,5352,5353],{},"    \"\"\"用于重置公司 VPN 或网络密码。当用户连不上网、VPN报错时使用。\"\"\"\n",[204,5355,5356],{"class":206,"line":947},[204,5357,5358],{},"    return \"VPN 密码已重置，新密码已发送至企业微信。\"\n",[204,5360,5361],{"class":206,"line":1177},[204,5362,229],{"emptyLinePlaceholder":228},[204,5364,5365],{"class":206,"line":1183},[204,5366,5367],{},"# 假设我们有上百个工具，这里放入一个列表\n",[204,5369,5370],{"class":206,"line":3353},[204,5371,5372],{},"all_enterprise_tools = [create_expense_ticket, apply_leave, reset_vpn_password]\n",[19,5374,5375],{"start":213},[22,5376,5377],{},[25,5378,5379],{},"构建核心的双轨路由引擎 (Tool Router)",[15,5381,5382],{},"这是整个架构的心脏。我们需要写一个类来管理向量化和动态提取。",[195,5384,5386],{"className":197,"code":5385,"language":199,"meta":200,"style":200},"from langchain_core.documents import Document\nfrom langchain_community.vectorstores import FAISS\nfrom langchain_openai import OpenAIEmbeddings\n\nclass DynamicToolRouter:\n    def __init__(self, tools_list):\n        # 1. 建立内存里的 Tool Map\n        self.tool_map = {tool.name: tool for tool in tools_list}\n        \n        # 2. 将工具转化为 Document 对象准备向量化\n        # 将 description 作为语义检索的核心内容，name 作为元数据用于后续映射\n        docs = [\n            Document(\n                page_content=tool.description, \n                metadata={\"tool_name\": tool.name}\n            ) for tool in tools_list\n        ]\n        \n        # 3. 建立向量索引 (这里使用 OpenAI 向量模型)\n        self.vectorstore = FAISS.from_documents(docs, OpenAIEmbeddings())\n        \n        # 4. 配置检索器：每次只返回最相关的 2 个工具\n        self.retriever = self.vectorstore.as_retriever(search_kwargs={\"k\": 2})\n\n    def get_relevant_tools(self, query: str):\n        \"\"\"根据用户的自然语言问题，动态返回最相关的工具对象列表\"\"\"\n        # 语义检索\n        retrieved_docs = self.retriever.invoke(query)\n        \n        # 打印日志以便观察路由过程\n        print(f\"\\n[Tool Router] 针对问题 '{query}'，命中的工具为：\")\n        \n        selected_tools = []\n        for doc in retrieved_docs:\n            tool_name = doc.metadata[\"tool_name\"]\n            selected_tools.append(self.tool_map[tool_name])\n            print(f\" -> {tool_name} (匹配理由: {doc.page_content[:20]}...)\")\n            \n        return selected_tools\n",[85,5387,5388,5393,5398,5403,5407,5412,5417,5422,5427,5431,5436,5441,5446,5451,5456,5461,5466,5471,5475,5480,5485,5489,5494,5499,5503,5508,5513,5518,5523,5527,5532,5537,5541,5546,5551,5556,5561,5566,5571],{"__ignoreMap":200},[204,5389,5390],{"class":206,"line":207},[204,5391,5392],{},"from langchain_core.documents import Document\n",[204,5394,5395],{"class":206,"line":213},[204,5396,5397],{},"from langchain_community.vectorstores import FAISS\n",[204,5399,5400],{"class":206,"line":219},[204,5401,5402],{},"from langchain_openai import OpenAIEmbeddings\n",[204,5404,5405],{"class":206,"line":225},[204,5406,229],{"emptyLinePlaceholder":228},[204,5408,5409],{"class":206,"line":232},[204,5410,5411],{},"class DynamicToolRouter:\n",[204,5413,5414],{"class":206,"line":238},[204,5415,5416],{},"    def __init__(self, tools_list):\n",[204,5418,5419],{"class":206,"line":244},[204,5420,5421],{},"        # 1. 建立内存里的 Tool Map\n",[204,5423,5424],{"class":206,"line":250},[204,5425,5426],{},"        self.tool_map = {tool.name: tool for tool in tools_list}\n",[204,5428,5429],{"class":206,"line":255},[204,5430,5096],{},[204,5432,5433],{"class":206,"line":261},[204,5434,5435],{},"        # 2. 将工具转化为 Document 对象准备向量化\n",[204,5437,5438],{"class":206,"line":267},[204,5439,5440],{},"        # 将 description 作为语义检索的核心内容，name 作为元数据用于后续映射\n",[204,5442,5443],{"class":206,"line":272},[204,5444,5445],{},"        docs = [\n",[204,5447,5448],{"class":206,"line":278},[204,5449,5450],{},"            Document(\n",[204,5452,5453],{"class":206,"line":284},[204,5454,5455],{},"                page_content=tool.description, \n",[204,5457,5458],{"class":206,"line":290},[204,5459,5460],{},"                metadata={\"tool_name\": tool.name}\n",[204,5462,5463],{"class":206,"line":296},[204,5464,5465],{},"            ) for tool in tools_list\n",[204,5467,5468],{"class":206,"line":301},[204,5469,5470],{},"        ]\n",[204,5472,5473],{"class":206,"line":307},[204,5474,5096],{},[204,5476,5477],{"class":206,"line":313},[204,5478,5479],{},"        # 3. 建立向量索引 (这里使用 OpenAI 向量模型)\n",[204,5481,5482],{"class":206,"line":872},[204,5483,5484],{},"        self.vectorstore = FAISS.from_documents(docs, OpenAIEmbeddings())\n",[204,5486,5487],{"class":206,"line":886},[204,5488,5096],{},[204,5490,5491],{"class":206,"line":906},[204,5492,5493],{},"        # 4. 配置检索器：每次只返回最相关的 2 个工具\n",[204,5495,5496],{"class":206,"line":911},[204,5497,5498],{},"        self.retriever = self.vectorstore.as_retriever(search_kwargs={\"k\": 2})\n",[204,5500,5501],{"class":206,"line":916},[204,5502,229],{"emptyLinePlaceholder":228},[204,5504,5505],{"class":206,"line":922},[204,5506,5507],{},"    def get_relevant_tools(self, query: str):\n",[204,5509,5510],{"class":206,"line":937},[204,5511,5512],{},"        \"\"\"根据用户的自然语言问题，动态返回最相关的工具对象列表\"\"\"\n",[204,5514,5515],{"class":206,"line":947},[204,5516,5517],{},"        # 语义检索\n",[204,5519,5520],{"class":206,"line":1177},[204,5521,5522],{},"        retrieved_docs = self.retriever.invoke(query)\n",[204,5524,5525],{"class":206,"line":1183},[204,5526,5096],{},[204,5528,5529],{"class":206,"line":3353},[204,5530,5531],{},"        # 打印日志以便观察路由过程\n",[204,5533,5534],{"class":206,"line":3373},[204,5535,5536],{},"        print(f\"\\n[Tool Router] 针对问题 '{query}'，命中的工具为：\")\n",[204,5538,5539],{"class":206,"line":3718},[204,5540,5096],{},[204,5542,5543],{"class":206,"line":3723},[204,5544,5545],{},"        selected_tools = []\n",[204,5547,5548],{"class":206,"line":3729},[204,5549,5550],{},"        for doc in retrieved_docs:\n",[204,5552,5553],{"class":206,"line":3735},[204,5554,5555],{},"            tool_name = doc.metadata[\"tool_name\"]\n",[204,5557,5558],{"class":206,"line":4955},[204,5559,5560],{},"            selected_tools.append(self.tool_map[tool_name])\n",[204,5562,5563],{"class":206,"line":4961},[204,5564,5565],{},"            print(f\" -> {tool_name} (匹配理由: {doc.page_content[:20]}...)\")\n",[204,5567,5569],{"class":206,"line":5568},38,[204,5570,4909],{},[204,5572,5574],{"class":206,"line":5573},39,[204,5575,5576],{},"        return selected_tools\n",[19,5578,5579],{"start":219},[22,5580,5581],{},[25,5582,5583],{},"将路由引擎接入 LCEL 链",[15,5585,5586,5587],{},"现在我们把动态路由逻辑和 LLM 组装在一起。我们要实现的是：",[25,5588,5589,5590,5593],{},"LLM 每次收到的 ",[85,5591,5592],{},"bind_tools"," 都是不一样的。",[195,5595,5597],{"className":197,"code":5596,"language":199,"meta":200,"style":200},"from langchain_openai import ChatOpenAI\nfrom langchain_core.prompts import ChatPromptTemplate\nfrom langchain_core.runnables import RunnablePassthrough\n\n# 1. 初始化模型和路由引擎\nllm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\ntool_router = DynamicToolRouter(all_enterprise_tools)\n\n# 2. 定义一个动态绑定的包装函数\ndef dynamic_llm_call(inputs: dict):\n    user_query = inputs[\"question\"]\n    \n    # 【核心逻辑】：根据用户问题，捞出 Top 2 工具\n    relevant_tools = tool_router.get_relevant_tools(user_query)\n    \n    # 动态将这 2 个工具绑定给大模型\n    llm_with_tools = llm.bind_tools(relevant_tools)\n    \n    # 构造 Prompt 并执行\n    prompt = ChatPromptTemplate.from_messages([\n        (\"system\", \"你是一个企业智能助手。请使用提供的工具帮助用户。\"),\n        (\"human\", \"{question}\")\n    ])\n    \n    # 组装一条临时的小链条并执行\n    chain = prompt | llm_with_tools\n    return chain.invoke({\"question\": user_query})\n\n# 3. 构建主 LCEL 链\nmain_chain = (\n    {\"question\": RunnablePassthrough()}\n    | dynamic_llm_call\n)\n\n# ================= 测试运行 =================\n\n# 场景 1：用户想报销\nprint(\"\\n--- 测试 1 ---\")\nresponse1 = main_chain.invoke(\"我刚才打车花了 50 块钱，帮我处理一下\")\nprint(f\"大模型决定调用的工具: {response1.tool_calls}\")\n\n# 场景 2：用户网络坏了\nprint(\"\\n--- 测试 2 ---\")\nresponse2 = main_chain.invoke(\"我 VPN 突然登不上了，一直报 403 错误\")\nprint(f\"大模型决定调用的工具: {response2.tool_calls}\")\n",[85,5598,5599,5603,5607,5612,5616,5621,5625,5630,5634,5639,5644,5649,5653,5658,5663,5667,5672,5677,5681,5686,5691,5696,5701,5706,5710,5715,5720,5725,5729,5734,5739,5744,5749,5753,5757,5762,5766,5771,5776,5781,5787,5792,5798,5804,5810],{"__ignoreMap":200},[204,5600,5601],{"class":206,"line":207},[204,5602,216],{},[204,5604,5605],{"class":206,"line":213},[204,5606,1050],{},[204,5608,5609],{"class":206,"line":219},[204,5610,5611],{},"from langchain_core.runnables import RunnablePassthrough\n",[204,5613,5614],{"class":206,"line":225},[204,5615,229],{"emptyLinePlaceholder":228},[204,5617,5618],{"class":206,"line":232},[204,5619,5620],{},"# 1. 初始化模型和路由引擎\n",[204,5622,5623],{"class":206,"line":238},[204,5624,1003],{},[204,5626,5627],{"class":206,"line":244},[204,5628,5629],{},"tool_router = DynamicToolRouter(all_enterprise_tools)\n",[204,5631,5632],{"class":206,"line":250},[204,5633,229],{"emptyLinePlaceholder":228},[204,5635,5636],{"class":206,"line":255},[204,5637,5638],{},"# 2. 定义一个动态绑定的包装函数\n",[204,5640,5641],{"class":206,"line":261},[204,5642,5643],{},"def dynamic_llm_call(inputs: dict):\n",[204,5645,5646],{"class":206,"line":267},[204,5647,5648],{},"    user_query = inputs[\"question\"]\n",[204,5650,5651],{"class":206,"line":272},[204,5652,745],{},[204,5654,5655],{"class":206,"line":278},[204,5656,5657],{},"    # 【核心逻辑】：根据用户问题，捞出 Top 2 工具\n",[204,5659,5660],{"class":206,"line":284},[204,5661,5662],{},"    relevant_tools = tool_router.get_relevant_tools(user_query)\n",[204,5664,5665],{"class":206,"line":290},[204,5666,745],{},[204,5668,5669],{"class":206,"line":296},[204,5670,5671],{},"    # 动态将这 2 个工具绑定给大模型\n",[204,5673,5674],{"class":206,"line":301},[204,5675,5676],{},"    llm_with_tools = llm.bind_tools(relevant_tools)\n",[204,5678,5679],{"class":206,"line":307},[204,5680,745],{},[204,5682,5683],{"class":206,"line":313},[204,5684,5685],{},"    # 构造 Prompt 并执行\n",[204,5687,5688],{"class":206,"line":872},[204,5689,5690],{},"    prompt = ChatPromptTemplate.from_messages([\n",[204,5692,5693],{"class":206,"line":886},[204,5694,5695],{},"        (\"system\", \"你是一个企业智能助手。请使用提供的工具帮助用户。\"),\n",[204,5697,5698],{"class":206,"line":906},[204,5699,5700],{},"        (\"human\", \"{question}\")\n",[204,5702,5703],{"class":206,"line":911},[204,5704,5705],{},"    ])\n",[204,5707,5708],{"class":206,"line":916},[204,5709,745],{},[204,5711,5712],{"class":206,"line":922},[204,5713,5714],{},"    # 组装一条临时的小链条并执行\n",[204,5716,5717],{"class":206,"line":937},[204,5718,5719],{},"    chain = prompt | llm_with_tools\n",[204,5721,5722],{"class":206,"line":947},[204,5723,5724],{},"    return chain.invoke({\"question\": user_query})\n",[204,5726,5727],{"class":206,"line":1177},[204,5728,229],{"emptyLinePlaceholder":228},[204,5730,5731],{"class":206,"line":1183},[204,5732,5733],{},"# 3. 构建主 LCEL 链\n",[204,5735,5736],{"class":206,"line":3353},[204,5737,5738],{},"main_chain = (\n",[204,5740,5741],{"class":206,"line":3373},[204,5742,5743],{},"    {\"question\": RunnablePassthrough()}\n",[204,5745,5746],{"class":206,"line":3718},[204,5747,5748],{},"    | dynamic_llm_call\n",[204,5750,5751],{"class":206,"line":3723},[204,5752,520],{},[204,5754,5755],{"class":206,"line":3729},[204,5756,229],{"emptyLinePlaceholder":228},[204,5758,5759],{"class":206,"line":3735},[204,5760,5761],{},"# ================= 测试运行 =================\n",[204,5763,5764],{"class":206,"line":4955},[204,5765,229],{"emptyLinePlaceholder":228},[204,5767,5768],{"class":206,"line":4961},[204,5769,5770],{},"# 场景 1：用户想报销\n",[204,5772,5773],{"class":206,"line":5568},[204,5774,5775],{},"print(\"\\n--- 测试 1 ---\")\n",[204,5777,5778],{"class":206,"line":5573},[204,5779,5780],{},"response1 = main_chain.invoke(\"我刚才打车花了 50 块钱，帮我处理一下\")\n",[204,5782,5784],{"class":206,"line":5783},40,[204,5785,5786],{},"print(f\"大模型决定调用的工具: {response1.tool_calls}\")\n",[204,5788,5790],{"class":206,"line":5789},41,[204,5791,229],{"emptyLinePlaceholder":228},[204,5793,5795],{"class":206,"line":5794},42,[204,5796,5797],{},"# 场景 2：用户网络坏了\n",[204,5799,5801],{"class":206,"line":5800},43,[204,5802,5803],{},"print(\"\\n--- 测试 2 ---\")\n",[204,5805,5807],{"class":206,"line":5806},44,[204,5808,5809],{},"response2 = main_chain.invoke(\"我 VPN 突然登不上了，一直报 403 错误\")\n",[204,5811,5813],{"class":206,"line":5812},45,[204,5814,5815],{},"print(f\"大模型决定调用的工具: {response2.tool_calls}\")\n",[324,5817],{},[15,5819,5820],{},[25,5821,5822],{},"生产环境下的高级优化策略 (资深经验)",[15,5824,5825],{},"在真正的企业级架构中，上面的代码只是骨架，我们通常还会加入以下“护城河”机制：",[19,5827,5828],{},[22,5829,5830],{},[25,5831,5832],{},"Always-On Tools (常驻工具池)",[15,5834,5835,5836,486,5839,486,5842,5845,5846,5849],{},"并不是所有工具都需要经过 RAG 检索。某些底层工具（如 ",[85,5837,5838],{},"search_web",[85,5840,5841],{},"calculator",[85,5843,5844],{},"escalate_to_human","）应该",[25,5847,5848],{},"永远","绑定在 LLM 上，无论用户问什么。",[76,5851,5852],{},[22,5853,5854,955,5856],{},[25,5855,1794],{},[85,5857,5858],{},"llm.bind_tools(relevant_tools + always_on_tools)",[19,5860,5861],{"start":213},[22,5862,5863],{},[25,5864,5865],{},"Hybrid Routing (混合路由机制)",[15,5867,5868,5869,104,5872,5875],{},"如果用户问“帮我提个财务报销单”，向量检索能完美命中。但如果系统庞大，工具名称极其相近（比如 ",[85,5870,5871],{},"get_hr_policy",[85,5873,5874],{},"get_finance_policy","），向量搜索可能会找错。",[76,5877,5878],{},[22,5879,5880,5883,5884,5887],{},[25,5881,5882],{},"工业级解法","：在向量检索之上，加一层轻量级的",[25,5885,5886],{},"大模型意图分类路由 (Semantic Router)","。先让一个便宜、极速的模型（如 Claude-3-Haiku）判断大类（如：HR / IT / 财务），然后再去对应的子工具向量库中进行检索。",[19,5889,5890],{"start":219},[22,5891,5892],{},[25,5893,5894],{},"动态 Prompt 构建 (注入工具指南)",[15,5896,5897],{},"有些极其复杂的工具，光靠 Pydantic Schema 是不够的，还需要在 Prompt 里给出“SOP（标准作业程序）”。",[76,5899,5900],{},[22,5901,5902,5904,5905,5908,5909,5911,5912,88],{},[25,5903,1794],{},"：既然我们在 ",[85,5906,5907],{},"Tool Router"," 中捞出了特定的工具，我们就可以同时把这些特定工具的 SOP 拼接到 ",[85,5910,362],{}," 中。没捞到的工具，其 SOP 就不占用 Token。这被称为",[25,5913,5914],{},"动态系统提示词",[15,5916,5917],{},[25,5918,5919],{},"工程师总结",[15,5921,5922,5923,955],{},"通过 ",[25,5924,5925],{},"RAG for Tools",[19,5927,5928,5934],{},[22,5929,5930,5933],{},[25,5931,5932],{},"你的系统不再有能力上限","：无论你有 10 个工具还是 10,000 个工具，大模型每次处理的 Token 数量和难度是不变的。",[22,5935,5936,5939],{},[25,5937,5938],{},"极高的精准度","：因为每次只给 LLM 极少数最相关的选项，模型产生“幻觉”调用错误工具的概率几乎降为 0。",[15,5941,5942],{},"希望这些代码和架构思考，能为你构建生产级的 Agent 带来实质性的启发！如果有任何一行代码的逻辑需要探讨，随时告诉我。",[324,5944],{},[63,5946,5948],{"id":5947},"_44-生产生存指南工具异常的自愈循环","4.4 生产生存指南：工具异常的“自愈循环”",[15,5950,5951,5952,5955],{},"在真实网络环境中，API 会限流，SQL 会报错。如果工具执行函数直接 ",[85,5953,5954],{},"raise Exception","，整个 Agent 链条会当场崩溃，抛出 500 错误。",[15,5957,5958],{},[25,5959,5960],{},"工程化策略：让 LLM 从错误中学习并重试 (Error Feedback)",[15,5962,5963,5964,5971],{},"不要让错误打断循环，而是",[25,5965,5966,5967,5970],{},"把报错信息当作一种 ",[85,5968,5969],{},"Observation","（观察结果）还给大模型","，让大模型自行修正。",[195,5973,5975],{"className":197,"code":5974,"language":199,"meta":200,"style":200},"from langchain_core.tools import ToolException\n\ndef safe_divide(a: int, b: int) -> float:\n    \"\"\"执行除法运算。\"\"\"\n    if b == 0:\n        # 主动抛出受控的 ToolException\n        raise ToolException(\"数学错误：除数不能为0。请检查你的输入并提供一个非零的除数。\")\n    return a / b\n\n# 在包装工具时，开启 handle_tool_error\ndivide_tool = StructuredTool.from_function(\n    func=safe_divide,\n    # 当捕获到 ToolException 时，将错误信息作为字符串返回给大模型\n    handle_tool_error=True \n)\n",[85,5976,5977,5982,5986,5991,5996,6001,6006,6011,6016,6020,6025,6030,6035,6040,6045],{"__ignoreMap":200},[204,5978,5979],{"class":206,"line":207},[204,5980,5981],{},"from langchain_core.tools import ToolException\n",[204,5983,5984],{"class":206,"line":213},[204,5985,229],{"emptyLinePlaceholder":228},[204,5987,5988],{"class":206,"line":219},[204,5989,5990],{},"def safe_divide(a: int, b: int) -> float:\n",[204,5992,5993],{"class":206,"line":225},[204,5994,5995],{},"    \"\"\"执行除法运算。\"\"\"\n",[204,5997,5998],{"class":206,"line":232},[204,5999,6000],{},"    if b == 0:\n",[204,6002,6003],{"class":206,"line":238},[204,6004,6005],{},"        # 主动抛出受控的 ToolException\n",[204,6007,6008],{"class":206,"line":244},[204,6009,6010],{},"        raise ToolException(\"数学错误：除数不能为0。请检查你的输入并提供一个非零的除数。\")\n",[204,6012,6013],{"class":206,"line":250},[204,6014,6015],{},"    return a / b\n",[204,6017,6018],{"class":206,"line":255},[204,6019,229],{"emptyLinePlaceholder":228},[204,6021,6022],{"class":206,"line":261},[204,6023,6024],{},"# 在包装工具时，开启 handle_tool_error\n",[204,6026,6027],{"class":206,"line":267},[204,6028,6029],{},"divide_tool = StructuredTool.from_function(\n",[204,6031,6032],{"class":206,"line":272},[204,6033,6034],{},"    func=safe_divide,\n",[204,6036,6037],{"class":206,"line":278},[204,6038,6039],{},"    # 当捕获到 ToolException 时，将错误信息作为字符串返回给大模型\n",[204,6041,6042],{"class":206,"line":284},[204,6043,6044],{},"    handle_tool_error=True \n",[204,6046,6047],{"class":206,"line":290},[204,6048,520],{},[15,6050,6051],{},[25,6052,6053],{},"运行时的自愈流程：",[19,6055,6056,6062,6073,6080,6086],{},[22,6057,6058,6059,88],{},"LLM 决定调用 ",[85,6060,6061],{},"safe_divide(a=10, b=0)",[22,6063,6064,6065,6068,6069,6072],{},"工具抛出异常。由于 ",[85,6066,6067],{},"handle_tool_error=True","，LangChain 拦截异常，并将字符串 ",[85,6070,6071],{},"\"数学错误：除数不能为0...\""," 发回给 LLM。",[22,6074,6075,6076,6079],{},"LLM 收到反馈，进行",[25,6077,6078],{},"自我反思 (Self-Reflection)","：“哦，我不能传 0，那我换个数字。”",[22,6081,6082,6083,88],{},"LLM 重新发起调用 ",[85,6084,6085],{},"safe_divide(a=10, b=2)",[22,6087,6088],{},"成功返回。",[324,6090],{},[58,6092,6094],{"id":6093},"_5-状态上下文与长短期记忆-memory-context","5 状态、上下文与长短期记忆 (Memory & Context)",[15,6096,6097],{},[25,6098,6099],{},"⏳ 短期记忆 (Short-term Memory)：",[76,6101,6102,6108],{},[22,6103,6104,6107],{},[25,6105,6106],{},"作用："," 维持当前这轮聊天的连贯性，通常是最近 N 轮的对话记录。",[22,6109,6110,6113,6114,6117],{},[25,6111,6112],{},"持久化："," 因为读写极度频繁，通常存储在 ",[25,6115,6116],{},"Redis"," 等内存型高速缓存数据库中。",[15,6119,6120,6123],{},[25,6121,6122],{},"🗓️ 长期记忆 (Long-term Memory)："," 负责跨越时间周期（比如几个月后）记住信息。它主要分为两类：",[76,6125,6126,6144],{},[22,6127,6128,6131,6132,6135,6136,6139,6140,6143],{},[25,6129,6130],{},"实体记忆 (Entity Memory)："," 存储明确的、结构化的“客观事实”。比如 ",[85,6133,6134],{},"{\"爱好\": \"摄影\", \"职业\": \"架构师\"}","。通常通过后台模型静默抽取，存储在",[25,6137,6138],{},"图数据库","或",[25,6141,6142],{},"关系型数据库 (SQL)"," 中。",[22,6145,6146,6149,6150,6153],{},[25,6147,6148],{},"向量记忆 (Vector Memory)："," 存储模糊的“经验与探讨”。比如你们曾经花半小时讨论过“微服务架构的设计方案”。系统会将这些历史对话切块并转化为向量，存储在",[25,6151,6152],{},"向量数据库 (Vector DB)"," 中，以便未来通过语义相似度找回。",[15,6155,6156,6159,6160,6163],{},[25,6157,6158],{},"🏗️ 整体流转架构："," 当用户输入新消息 -> 系统去 Redis 拉取“短期记忆” -> 去长期数据库检索相关的“实体”和“向量”记忆 -> 将所有筛选出的记忆组装到当前的 ",[25,6161,6162],{},"Context"," 中 -> 发送给大模型生成回复 -> 后台异步更新各个记忆数据库。",[63,6165,6167],{"id":6166},"_51-短期记忆会话上下文管理与-token-压榨","5.1 短期记忆：会话上下文管理与 Token 压榨",[15,6169,6170],{},"我们必须先确立一个架构共识：**大模型本身是绝对“失忆”的（Stateless）。**它没有脑容量来记住你上一秒说的话。所谓的“记忆”，全靠我们在后端把历史聊天记录像“俄罗斯套娃”一样，拼接在最新的 Prompt 里发给它。",[15,6172,6173,6174,88],{},"这带来了一个致命的工程痛点：",[25,6175,6176],{},"Token 爆炸",[15,6178,6179],{},"如果用户聊了 100 轮，你把 100 轮全传过去：",[19,6181,6182,6188,6193],{},[22,6183,6184,6187],{},[25,6185,6186],{},"API 费用呈指数级飙升","（因为每次请求的输入 Token 都在增加）。",[22,6189,6190,88],{},[25,6191,6192],{},"响应延迟（Latency）极高",[22,6194,6195,6198],{},[25,6196,6197],{},"“中间失忆症 (Lost in the Middle)”","：当输入过长时，LLM 会严重忽略中间的文本，只关注开头和结尾。",[15,6200,6201],{},"为了解决这个问题，LangChain 经历了从“古典时代”到“现代 LCEL 时代”的演进。",[15,6203,6204],{},[25,6205,6206,6207,6210],{},"1. 古典陷阱：",[85,6208,6209],{},"ConversationBufferMemory"," 为什么被淘汰？",[15,6212,6213,6214,6216],{},"在很多早期的教程中，你会看到 ",[85,6215,6209],{},"。它的逻辑最简单：把用户和 AI 说过的每一句话，原封不动地存在一个列表里，然后全量传给 LLM。",[15,6218,6219,6222,6223,6226,6227,6230,6231,88],{},[25,6220,6221],{},"资深工程师视角："," 这是玩具代码。在生产环境中，",[25,6224,6225],{},"永远不要使用无上限的 Buffer","。只要用户一直聊，它一定会触发 API 的 ",[85,6228,6229],{},"max_tokens"," 报错，导致整个服务直接崩溃。我们需要的是",[25,6232,6233],{},"可控的上下文截断策略",[19,6235,6236],{"start":213},[22,6237,6238],{},[25,6239,6240,6241],{},"现代策略 A：滑动窗口 (Sliding Window) 与 ",[85,6242,6243],{},"trim_messages",[15,6245,6246,6247,88],{},"这是目前生产环境中最常用、性价比最高的策略。它的核心思想是：",[25,6248,6249],{},"“好汉不提当年勇，只看最近 N 轮”",[15,6251,6252,6253,6256,6257,6261],{},"在最新的 LangChain Core 中，我们不再依赖老旧的 ",[85,6254,6255],{},"Memory"," 类，而是直接在消息流（Message List）上使用 ",[25,6258,6259],{},[85,6260,6243],{}," 工具函数。",[15,6263,6264],{},[25,6265,6266],{},"工程化实操代码：",[195,6268,6270],{"className":197,"code":6269,"language":199,"meta":200,"style":200},"from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, trim_messages\nfrom langchain_openai import ChatOpenAI\n\n# 模拟一段极长的历史对话 (假设从数据库里捞出来的)\nlong_history = [\n    HumanMessage(\"我叫王大锤，是一名前端开发。\"),\n    AIMessage(\"你好王大锤，很高兴认识你！\"),\n    HumanMessage(\"我最近在学 React。\"),\n    AIMessage(\"React 是个很棒的框架，需要帮忙吗？\"),\n    HumanMessage(\"你能帮我写个 Hook 吗？\"),\n    AIMessage(\"没问题，你要什么功能的？\"),\n    HumanMessage(\"我要一个防抖的 Hook。\") # 这是用户最新的问题\n]\n\n# 工业级截断器：严格控制传递给大模型的 Token 数量\ntrimmed_history = trim_messages(\n    long_history,\n    max_tokens=40,            # 设定最大允许的 token 数量\n    strategy=\"last\",          # 策略：保留最新的消息，丢弃最老的\n    token_counter=ChatOpenAI(model=\"gpt-3.5-turbo\"), # 使用具体的模型来精准计算 Token\n    include_system=True,      # 强制：系统提示词绝不能被截断丢弃！\n    allow_partial=False       # 不允许截断半句话，要丢就丢完整的一条 Message\n)\n\nprint(f\"原始长度: {len(long_history)} 条\")\nprint(f\"截断后长度: {len(trimmed_history)} 条\")\nprint(\"实际传给模型的内容:\", trimmed_history)\n",[85,6271,6272,6277,6281,6285,6290,6295,6300,6305,6310,6315,6320,6325,6330,6334,6338,6343,6348,6353,6358,6363,6368,6373,6378,6382,6386,6391,6396],{"__ignoreMap":200},[204,6273,6274],{"class":206,"line":207},[204,6275,6276],{},"from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, trim_messages\n",[204,6278,6279],{"class":206,"line":213},[204,6280,216],{},[204,6282,6283],{"class":206,"line":219},[204,6284,229],{"emptyLinePlaceholder":228},[204,6286,6287],{"class":206,"line":225},[204,6288,6289],{},"# 模拟一段极长的历史对话 (假设从数据库里捞出来的)\n",[204,6291,6292],{"class":206,"line":232},[204,6293,6294],{},"long_history = [\n",[204,6296,6297],{"class":206,"line":238},[204,6298,6299],{},"    HumanMessage(\"我叫王大锤，是一名前端开发。\"),\n",[204,6301,6302],{"class":206,"line":244},[204,6303,6304],{},"    AIMessage(\"你好王大锤，很高兴认识你！\"),\n",[204,6306,6307],{"class":206,"line":250},[204,6308,6309],{},"    HumanMessage(\"我最近在学 React。\"),\n",[204,6311,6312],{"class":206,"line":255},[204,6313,6314],{},"    AIMessage(\"React 是个很棒的框架，需要帮忙吗？\"),\n",[204,6316,6317],{"class":206,"line":261},[204,6318,6319],{},"    HumanMessage(\"你能帮我写个 Hook 吗？\"),\n",[204,6321,6322],{"class":206,"line":267},[204,6323,6324],{},"    AIMessage(\"没问题，你要什么功能的？\"),\n",[204,6326,6327],{"class":206,"line":272},[204,6328,6329],{},"    HumanMessage(\"我要一个防抖的 Hook。\") # 这是用户最新的问题\n",[204,6331,6332],{"class":206,"line":278},[204,6333,4210],{},[204,6335,6336],{"class":206,"line":284},[204,6337,229],{"emptyLinePlaceholder":228},[204,6339,6340],{"class":206,"line":290},[204,6341,6342],{},"# 工业级截断器：严格控制传递给大模型的 Token 数量\n",[204,6344,6345],{"class":206,"line":296},[204,6346,6347],{},"trimmed_history = trim_messages(\n",[204,6349,6350],{"class":206,"line":301},[204,6351,6352],{},"    long_history,\n",[204,6354,6355],{"class":206,"line":307},[204,6356,6357],{},"    max_tokens=40,            # 设定最大允许的 token 数量\n",[204,6359,6360],{"class":206,"line":313},[204,6361,6362],{},"    strategy=\"last\",          # 策略：保留最新的消息，丢弃最老的\n",[204,6364,6365],{"class":206,"line":872},[204,6366,6367],{},"    token_counter=ChatOpenAI(model=\"gpt-3.5-turbo\"), # 使用具体的模型来精准计算 Token\n",[204,6369,6370],{"class":206,"line":886},[204,6371,6372],{},"    include_system=True,      # 强制：系统提示词绝不能被截断丢弃！\n",[204,6374,6375],{"class":206,"line":906},[204,6376,6377],{},"    allow_partial=False       # 不允许截断半句话，要丢就丢完整的一条 Message\n",[204,6379,6380],{"class":206,"line":911},[204,6381,520],{},[204,6383,6384],{"class":206,"line":916},[204,6385,229],{"emptyLinePlaceholder":228},[204,6387,6388],{"class":206,"line":922},[204,6389,6390],{},"print(f\"原始长度: {len(long_history)} 条\")\n",[204,6392,6393],{"class":206,"line":937},[204,6394,6395],{},"print(f\"截断后长度: {len(trimmed_history)} 条\")\n",[204,6397,6398],{"class":206,"line":947},[204,6399,6400],{},"print(\"实际传给模型的内容:\", trimmed_history)\n",[15,6402,6403],{},[25,6404,6405],{},"优缺点分析：",[76,6407,6408,6413],{},[22,6409,6410,6412],{},[25,6411,1713],{},"：Token 消耗极其稳定，永远不会超载崩溃。实现简单，延迟极低。",[22,6414,6415,6417],{},[25,6416,1718],{},"：刚性的物理截断。如果在第 1 轮用户说了他的名字，而在第 10 轮问“我叫什么”，由于窗口只保留最近 5 轮，LLM 会直接回答“不知道”。",[15,6419,6420],{},[25,6421,6422],{},"3. 现代策略 B：动态摘要 (Summary Memory)",[15,6424,6425,6426,88],{},"为了弥补滑动窗口“一刀切”的缺陷，我们需要引入",[25,6427,6428],{},"动态摘要机制",[15,6430,6431,6432],{},"它的核心逻辑是：",[25,6433,6434],{},"“大群聊天太长看不过来，我们单开一个小号帮你做会议纪要”。",[15,6436,6437],{},"我们将保留最近的 3 轮完整对话（用于维持当下的聊天连贯性），而将更早的 100 轮对话，交给一个后台的小模型（如 GPT-3.5-Turbo 或者更便宜的开源模型），让它压缩成一段 200 字的摘要。",[15,6439,6440],{},[25,6441,6442],{},"架构设计流：",[19,6444,6445,6448,6455,6461],{},[22,6446,6447],{},"监听对话轮数。",[22,6449,6450,6451,6454],{},"当 ",[85,6452,6453],{},"len(messages) > 10"," 时，触发异步后台任务 (Celery / Asyncio)。",[22,6456,6457,6458],{},"后台任务 Prompt：",[1456,6459,6460],{},"“请将以下对话总结为第三人称视角的摘要，重点保留用户的个人信息、意图和关键事实。”",[22,6462,6463,6464,6467,88],{},"下一次请求时，拼装的 Prompt 结构变为：",[6465,6466],"br",{},[85,6468,6469],{},"[SystemMessage] + [SummaryMessage(包含过往摘要)] + [最近 3 轮的 Message]",[15,6471,6472],{},[25,6473,6405],{},[76,6475,6476,6481],{},[22,6477,6478,6480],{},[25,6479,1713],{},"：在可控的 Token 范围内，实现了近乎“无限”的记忆视野。",[22,6482,6483,6485],{},[25,6484,1718],{},"：工程复杂度剧增。额外引入了一次 LLM 调用，增加了成本。且摘要过程不可避免地会丢失细节语意（比如原话的情感色彩）。",[324,6487],{},[15,6489,6490,6491,6494],{},"在企业级应用的“短期记忆”管理中，我们并不追求让 AI 记住所有字，而是追求 ",[25,6492,6493],{},"Token 成本、响应速度和语义完整度"," 的黄金三角平衡。",[15,6496,6497,6498,6501,6502,88],{},"一般 To C 的聊天产品（如客服），用 ",[25,6499,6500],{},"滑动窗口（按 Token 截断）"," 足够了；而长周期的陪伴型 AI 或深度的代码助手，必须上 ",[25,6503,6504],{},"混合架构（滑动窗口 + 动态摘要）",[15,6506,6507],{},"收到。我们严格按照大纲，一步一步扎实推进。绝不走马观花。",[63,6509,6511],{"id":6510},"_52-持久化层设计","5.2 持久化层设计",[324,6513],{},[15,6515,6516],{},[25,6517,6518],{},"🗄️ 为什么生产环境不能用内存（RAM）存记忆？",[15,6520,6521,6522,6524,6525,6528],{},"在上一节测试 ",[85,6523,6243],{}," 时，我们的聊天记录 ",[85,6526,6527],{},"long_history"," 是存在一个 Python 列表变量里的。在企业级后端架构（如 FastAPI, Django, SpringBoot）中，这种做法是致命的：",[19,6530,6531,6537],{},[22,6532,6533,6536],{},[25,6534,6535],{},"无状态与负载均衡","：生产环境通常会启动多个后端进程（Worker）甚至跨多台服务器。用户的第一句话可能打到了服务器 A，第二句话打到了服务器 B。如果记忆存在服务器 A 的内存里，服务器 B 是拿不到的，大模型就会“失忆”。",[22,6538,6539,6542],{},[25,6540,6541],{},"数据丢失","：每次重新发布代码或容器重启，内存里的聊天记录就会瞬间清空。",[15,6544,6545,6548,6549,1224,6552,6555],{},[25,6546,6547],{},"资深架构方案","：将 Agent 的大脑（逻辑）和海马体（记忆）物理拆分。我们引入 ",[25,6550,6551],{},"分布式缓存（Redis）",[25,6553,6554],{},"关系型数据库（Postgres/MySQL）"," 来作为全局持久化层。",[15,6557,6558],{},[25,6559,6560],{},"🛡️ 多租户架构与 Session 隔离",[15,6562,6563,6564,6567],{},"在 2C 的产品中，可能有上万个用户同时在和你的 Agent 聊天。我们必须做到绝对的",[25,6565,6566],{},"数据隔离","，确保张三绝对不能通过系统漏洞看到李四的聊天记录。",[15,6569,6570,6571,6576,6577,88],{},"在 LangChain 中，实现这种隔离的核心机制是 ",[25,6572,6573],{},[85,6574,6575],{},"RunnableWithMessageHistory"," 配合 ",[25,6578,6579],{},[85,6580,6581],{},"session_id",[15,6583,6584],{},[25,6585,6586],{},"工程化代码实操：基于 Redis 的分布式会话隔离",[195,6588,6590],{"className":197,"code":6589,"language":199,"meta":200,"style":200},"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\nfrom langchain_core.runnables.history import RunnableWithMessageHistory\nfrom langchain_community.chat_message_histories import RedisChatMessageHistory\nfrom langchain_openai import ChatOpenAI\n\n# 1. 构建基础链 (注意 MessagesPlaceholder 的占位)\nprompt = ChatPromptTemplate.from_messages([\n    (\"system\", \"你是一个专业的后端工程师助手。\"),\n    MessagesPlaceholder(variable_name=\"chat_history\"), \n    (\"human\", \"{question}\")\n])\nchain = prompt | ChatOpenAI(model=\"gpt-3.5-turbo\")\n\n# 2. 核心工厂函数：动态获取指定 Session 的持久化记忆\ndef get_redis_history(session_id: str):\n    \"\"\"\n    当请求到来时，LangChain 会自动调用这个函数。\n    session_id 是实现多租户隔离的唯一标识 (例如: user_1001_chat_001)\n    \"\"\"\n    return RedisChatMessageHistory(\n        session_id=session_id,\n        url=\"redis://localhost:6379/0\" \n    )\n\n# 3. 包装器：将基础链升级为带分布式记忆的链\n# 它会自动拦截输入，去 Redis 拿历史记录；执行完毕后，再自动把新对话存回 Redis。\nconversational_chain = RunnableWithMessageHistory(\n    runnable=chain,\n    get_session_history=get_redis_history,\n    input_messages_key=\"question\",\n    history_messages_key=\"chat_history\", \n)\n\n# ================= 生产环境运行模拟 =================\n\n# 租户 A (张三) 的请求打过来了\nresponse_A = conversational_chain.invoke(\n    {\"question\": \"我的项目是用 FastAPI 写的。\"},\n    # 这里的 config 是通过后端的 JWT Token 解析出来的当前用户身份\n    config={\"configurable\": {\"session_id\": \"tenant_zhangsan_01\"}}\n)\n\n# 租户 B (李四) 的请求同时打过来了\nresponse_B = conversational_chain.invoke(\n    {\"question\": \"你知道我刚才说我用了什么框架吗？\"},\n    config={\"configurable\": {\"session_id\": \"tenant_lisi_99\"}}\n)\n# AI 会回答李四：不知道。因为李四的 session_id 在 Redis 里是空的。\n",[85,6591,6592,6597,6602,6607,6611,6615,6620,6625,6630,6635,6640,6644,6649,6653,6658,6663,6668,6673,6678,6682,6687,6692,6697,6701,6705,6710,6715,6720,6725,6730,6735,6740,6744,6748,6753,6757,6762,6767,6772,6777,6782,6786,6790,6795,6800,6805,6811,6816],{"__ignoreMap":200},[204,6593,6594],{"class":206,"line":207},[204,6595,6596],{},"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",[204,6598,6599],{"class":206,"line":213},[204,6600,6601],{},"from langchain_core.runnables.history import RunnableWithMessageHistory\n",[204,6603,6604],{"class":206,"line":219},[204,6605,6606],{},"from langchain_community.chat_message_histories import RedisChatMessageHistory\n",[204,6608,6609],{"class":206,"line":225},[204,6610,216],{},[204,6612,6613],{"class":206,"line":232},[204,6614,229],{"emptyLinePlaceholder":228},[204,6616,6617],{"class":206,"line":238},[204,6618,6619],{},"# 1. 构建基础链 (注意 MessagesPlaceholder 的占位)\n",[204,6621,6622],{"class":206,"line":244},[204,6623,6624],{},"prompt = ChatPromptTemplate.from_messages([\n",[204,6626,6627],{"class":206,"line":250},[204,6628,6629],{},"    (\"system\", \"你是一个专业的后端工程师助手。\"),\n",[204,6631,6632],{"class":206,"line":255},[204,6633,6634],{},"    MessagesPlaceholder(variable_name=\"chat_history\"), \n",[204,6636,6637],{"class":206,"line":261},[204,6638,6639],{},"    (\"human\", \"{question}\")\n",[204,6641,6642],{"class":206,"line":267},[204,6643,525],{},[204,6645,6646],{"class":206,"line":272},[204,6647,6648],{},"chain = prompt | ChatOpenAI(model=\"gpt-3.5-turbo\")\n",[204,6650,6651],{"class":206,"line":278},[204,6652,229],{"emptyLinePlaceholder":228},[204,6654,6655],{"class":206,"line":284},[204,6656,6657],{},"# 2. 核心工厂函数：动态获取指定 Session 的持久化记忆\n",[204,6659,6660],{"class":206,"line":290},[204,6661,6662],{},"def get_redis_history(session_id: str):\n",[204,6664,6665],{"class":206,"line":296},[204,6666,6667],{},"    \"\"\"\n",[204,6669,6670],{"class":206,"line":301},[204,6671,6672],{},"    当请求到来时，LangChain 会自动调用这个函数。\n",[204,6674,6675],{"class":206,"line":307},[204,6676,6677],{},"    session_id 是实现多租户隔离的唯一标识 (例如: user_1001_chat_001)\n",[204,6679,6680],{"class":206,"line":313},[204,6681,6667],{},[204,6683,6684],{"class":206,"line":872},[204,6685,6686],{},"    return RedisChatMessageHistory(\n",[204,6688,6689],{"class":206,"line":886},[204,6690,6691],{},"        session_id=session_id,\n",[204,6693,6694],{"class":206,"line":906},[204,6695,6696],{},"        url=\"redis://localhost:6379/0\" \n",[204,6698,6699],{"class":206,"line":911},[204,6700,824],{},[204,6702,6703],{"class":206,"line":916},[204,6704,229],{"emptyLinePlaceholder":228},[204,6706,6707],{"class":206,"line":922},[204,6708,6709],{},"# 3. 包装器：将基础链升级为带分布式记忆的链\n",[204,6711,6712],{"class":206,"line":937},[204,6713,6714],{},"# 它会自动拦截输入，去 Redis 拿历史记录；执行完毕后，再自动把新对话存回 Redis。\n",[204,6716,6717],{"class":206,"line":947},[204,6718,6719],{},"conversational_chain = RunnableWithMessageHistory(\n",[204,6721,6722],{"class":206,"line":1177},[204,6723,6724],{},"    runnable=chain,\n",[204,6726,6727],{"class":206,"line":1183},[204,6728,6729],{},"    get_session_history=get_redis_history,\n",[204,6731,6732],{"class":206,"line":3353},[204,6733,6734],{},"    input_messages_key=\"question\",\n",[204,6736,6737],{"class":206,"line":3373},[204,6738,6739],{},"    history_messages_key=\"chat_history\", \n",[204,6741,6742],{"class":206,"line":3718},[204,6743,520],{},[204,6745,6746],{"class":206,"line":3723},[204,6747,229],{"emptyLinePlaceholder":228},[204,6749,6750],{"class":206,"line":3729},[204,6751,6752],{},"# ================= 生产环境运行模拟 =================\n",[204,6754,6755],{"class":206,"line":3735},[204,6756,229],{"emptyLinePlaceholder":228},[204,6758,6759],{"class":206,"line":4955},[204,6760,6761],{},"# 租户 A (张三) 的请求打过来了\n",[204,6763,6764],{"class":206,"line":4961},[204,6765,6766],{},"response_A = conversational_chain.invoke(\n",[204,6768,6769],{"class":206,"line":5568},[204,6770,6771],{},"    {\"question\": \"我的项目是用 FastAPI 写的。\"},\n",[204,6773,6774],{"class":206,"line":5573},[204,6775,6776],{},"    # 这里的 config 是通过后端的 JWT Token 解析出来的当前用户身份\n",[204,6778,6779],{"class":206,"line":5783},[204,6780,6781],{},"    config={\"configurable\": {\"session_id\": \"tenant_zhangsan_01\"}}\n",[204,6783,6784],{"class":206,"line":5789},[204,6785,520],{},[204,6787,6788],{"class":206,"line":5794},[204,6789,229],{"emptyLinePlaceholder":228},[204,6791,6792],{"class":206,"line":5800},[204,6793,6794],{},"# 租户 B (李四) 的请求同时打过来了\n",[204,6796,6797],{"class":206,"line":5806},[204,6798,6799],{},"response_B = conversational_chain.invoke(\n",[204,6801,6802],{"class":206,"line":5812},[204,6803,6804],{},"    {\"question\": \"你知道我刚才说我用了什么框架吗？\"},\n",[204,6806,6808],{"class":206,"line":6807},46,[204,6809,6810],{},"    config={\"configurable\": {\"session_id\": \"tenant_lisi_99\"}}\n",[204,6812,6814],{"class":206,"line":6813},47,[204,6815,520],{},[204,6817,6819],{"class":206,"line":6818},48,[204,6820,6821],{},"# AI 会回答李四：不知道。因为李四的 session_id 在 Redis 里是空的。\n",[15,6823,6824],{},[25,6825,6826],{},"⚖️ 选型权衡：Redis vs Postgres",[15,6828,6829],{},"在实际工程中，你会面临选哪种数据库存记忆的问题：",[76,6831,6832,6851],{},[22,6833,6834,2424,6837],{},[25,6835,6836],{},"Redis (内存型)",[76,6838,6839,6845],{},[22,6840,6841,6844],{},[1456,6842,6843],{},"优势","：极速的读写性能（毫秒级），非常适合高并发的实时聊天场景。",[22,6846,6847,6850],{},[1456,6848,6849],{},"劣势","：内存昂贵，不适合永久存储用户几年前的废话。",[22,6852,6853,2424,6856],{},[25,6854,6855],{},"Postgres / MySQL (关系型)",[76,6857,6858,6863],{},[22,6859,6860,6862],{},[1456,6861,6843],{},"：持久化安全，方便做复杂的数据分析（比如写 SQL 统计“哪个用户本月聊得最多”）。",[22,6864,6865,6867],{},[1456,6866,6849],{},"：高并发下磁盘 I/O 容易成为系统瓶颈。",[15,6869,6870,6873,6874,6877],{},[25,6871,6872],{},"生产级混合架构","：通常我们会用 Redis 存储",[25,6875,6876],{},"最近 24 小时","的活跃会话（热数据），然后通过后台定时任务（如 Celery），将不活跃的对话异步落盘迁移到 Postgres 中（冷数据归档）。",[324,6879],{},[15,6881,6882],{},"这就完成了记忆的物理层设计。大模型不仅拥有了记忆，而且这些记忆能在分布式集群中安全流转了。",[15,6884,6885],{},"针对我们刚才讨论的 Redis 方案，思考一个生产环境中的隐患：",[15,6887,6888],{},"如果你有 100 万个活跃用户，每天产生海量的对话，如果都无脑堆积在 Redis 里，极其昂贵的服务器内存很快就会被撑爆（OOM）。在初始化 Redis 存储机制或者设计这个系统时，我们通常需要利用 Redis 的什么特性来防御这种内存溢出危机？",[63,6890,6892],{"id":6891},"_53-长期长期记忆知识与经验的检索","5.3 长期长期记忆：知识与经验的检索",[15,6894,6895,6896,6899,6900,6903],{},"“死狗问题”（昨天说有狗，今天说狗死了，系统怎么处理）切中了长期记忆架构中最核心的痛点：",[25,6897,6898],{},"状态突变与知识冲突","。简单的 RAG 只是“追加（Append-Only）”数据的垃圾桶，而真正的企业级长期记忆，必须是一个支持 ",[25,6901,6902],{},"CRUD（增删改查）"," 的事务型数据库。",[15,6905,6906,6907,104,6910,6913],{},"在设计大型智能体系统时，我们通常将长期记忆在逻辑上划分为两层：",[25,6908,6909],{},"情景记忆（Episodic Memory）",[25,6911,6912],{},"语义/实体记忆（Semantic/Entity Memory）","。它们的选型、存储和更新机制截然不同。",[15,6915,6916],{},[25,6917,6918],{},"一、 架构选型与存储引擎",[19,6920,6921],{},[22,6922,6923],{},[25,6924,6925],{},"情景记忆 (Episodic Memory)：存储“经历与经验”",[15,6927,6928],{},"这部分记忆用于回答：“我们上个月是怎么讨论那个架构方案的？”",[76,6930,6931,6937,6943],{},[22,6932,6933,6936],{},[25,6934,6935],{},"数据特征："," 非结构化、长文本、重语境、无明显冲突（经历过的事情就是经历过了）。",[22,6938,6939,6942],{},[25,6940,6941],{},"核心痛点："," 召回率（不能漏掉关键经验）和 Token 成本。",[22,6944,6945,6948],{},[25,6946,6947],{},"存储选型指南：",[76,6949,6950,6961,6983],{},[22,6951,6952,155,6955,486,6958,6960],{},[25,6953,6954],{},"入门级/中小体量：",[85,6956,6957],{},"Chroma",[85,6959,1940],{},"。适合单机部署，无需复杂运维。",[22,6962,6963,155,6966,486,6969,486,6972,6975,6976,104,6979,6982],{},[25,6964,6965],{},"企业级/云原生：",[85,6967,6968],{},"Milvus",[85,6970,6971],{},"Pinecone",[85,6973,6974],{},"Qdrant","。支持百亿级向量、多租户隔离（Namespace）、极其重要的 **Metadata Filtering（元数据过滤）**能力（如按 ",[85,6977,6978],{},"user_id",[85,6980,6981],{},"timestamp"," 过滤后再进行向量检索）。",[22,6984,6985,155,6988,1450,6995],{},[25,6986,6987],{},"首选（务实路线）：",[25,6989,6990,6991,6994],{},"PostgreSQL + ",[85,6992,6993],{},"pgvector"," 插件",[76,6996,6997],{},[22,6998,6999,7002],{},[1456,7000,7001],{},"为什么推荐？"," 在绝大多数 ToB 场景中，引入一个独立的向量数据库会带来严重的“脑裂”问题（关系型数据和向量数据的一致性难以维护）。使用 Postgres，你可以用一条 SQL 语句同时完成用户鉴权、时间过滤和向量相似度计算。",[19,7004,7005],{"start":213},[22,7006,7007],{},[25,7008,7009],{},"实体/语义记忆 (Entity Memory)：存储“客观事实”",[15,7011,7012],{},"这部分用于回答：“这个用户的架构栈是什么？”、“他的狗还活着吗？”",[76,7014,7015,7020,7029],{},[22,7016,7017,7019],{},[25,7018,6935],{}," 结构化、强逻辑、存在状态覆盖（State Overrides）。",[22,7021,7022,7024,7025,7028],{},[25,7023,6941],{}," 冲突消解（Conflict Resolution）。",[25,7026,7027],{},"向量检索在这里会彻底失效","，因为“我喜欢猫”和“我讨厌猫”在向量空间里的余弦相似度极高。",[22,7030,7031,7033],{},[25,7032,6947],{},[76,7034,7035,7045],{},[22,7036,7037,7040,7041,7044],{},[25,7038,7039],{},"关系型数据库 (PostgreSQL/MySQL)："," 适合属性扁平的用户画像（User Profile）。通过定义明确的表结构（如 ",[85,7042,7043],{},"user_preferences"," 表）来存储。",[22,7046,7047,7050,7051,7054],{},[25,7048,7049],{},"图数据库 (Neo4j / Memgraph)："," 当实体间的关系极度复杂时（例如：Agent 需要记住“A 是 B 的上司，B 负责 C 项目，C 项目的截止日期是明天”），图数据库是唯一的解。LangChain 提供了 ",[85,7052,7053],{},"GraphCypherQAChain"," 可以直接将自然语言转化为 Cypher 查询语句。",[324,7056],{},[15,7058,7059],{},[25,7060,7061],{},"二、 核心痛点攻坚：冲突消解与事实覆写",[15,7063,7064],{},"回到那个经典的并发工程问题：如何处理“昨天有狗，今天狗死了”的冲突？",[15,7066,7067],{},"如果我们只是无脑把用户的聊天记录向量化存入 RAG 库，检索时会同时把“我有狗”和“狗死了”都召回，大模型大概率会精神分裂。",[15,7069,7070],{},[25,7071,7072],{},"资深架构方案：将大模型降级为“事件溯源生成器 (Event Sourcing Generator)”",[15,7074,7075,7076,7079,7080,7083],{},"我们不让模型直接改数据库，而是让后台的信息抽取模型输出",[25,7077,7078],{},"标准化的数据库操作指令","。这就要求我们在 Prompt 和 Pydantic Schema 的设计上引入 ",[25,7081,7082],{},"操作符 (Operators)"," 的概念。",[15,7085,7086],{},[25,7087,7088],{},"核心代码设计架构：",[195,7090,7092],{"className":197,"code":7091,"language":199,"meta":200,"style":200},"from pydantic import BaseModel, Field\nfrom typing import Literal, List\n\n# 1. 定义带有“操作语义”的契约\nclass FactOperation(BaseModel):\n    # 强制模型明确它是在新增、更新还是删除事实\n    operation: Literal[\"INSERT\", \"UPDATE\", \"DELETE\"] = Field(\n        description=\"操作类型。如果事实发生改变（如原本喜欢变讨厌，活的变死的），使用 UPDATE；如果事实不再存在，使用 DELETE。\"\n    )\n    subject: str = Field(description=\"事实主体，例如 '宠物', '职位'\")\n    fact: str = Field(description=\"事实的具体内容\")\n    confidence: float = Field(description=\"你认为这是用户随口一说，还是确凿事实？0.0 - 1.0\")\n\nclass MemoryUpdateList(BaseModel):\n    updates: List[FactOperation]\n\n# 2. 抽取链的 Prompt 设计（极其关键）\nextraction_prompt = \"\"\"\n你是一个底层知识图谱的维护引擎。\n请分析用户最新的发言，并对比目前的已知事实，输出更新指令。\n\n当前已知事实：\n{current_facts}\n\n用户最新发言：\n\"{user_input}\"\n\n规则：\n1. 如果用户发言与已知事实冲突，且代表了最新的状态（例如：辞职、宠物离世、搬家），你必须输出 UPDATE 或 DELETE 指令。\n2. 忽略临时的情绪发泄，只记录客观事实。\n\"\"\"\n",[85,7093,7094,7098,7103,7107,7112,7117,7122,7127,7132,7136,7141,7146,7151,7155,7160,7165,7169,7174,7179,7184,7189,7193,7198,7203,7207,7212,7217,7221,7226,7231,7236],{"__ignoreMap":200},[204,7095,7096],{"class":206,"line":207},[204,7097,4793],{},[204,7099,7100],{"class":206,"line":213},[204,7101,7102],{},"from typing import Literal, List\n",[204,7104,7105],{"class":206,"line":219},[204,7106,229],{"emptyLinePlaceholder":228},[204,7108,7109],{"class":206,"line":225},[204,7110,7111],{},"# 1. 定义带有“操作语义”的契约\n",[204,7113,7114],{"class":206,"line":232},[204,7115,7116],{},"class FactOperation(BaseModel):\n",[204,7118,7119],{"class":206,"line":238},[204,7120,7121],{},"    # 强制模型明确它是在新增、更新还是删除事实\n",[204,7123,7124],{"class":206,"line":244},[204,7125,7126],{},"    operation: Literal[\"INSERT\", \"UPDATE\", \"DELETE\"] = Field(\n",[204,7128,7129],{"class":206,"line":250},[204,7130,7131],{},"        description=\"操作类型。如果事实发生改变（如原本喜欢变讨厌，活的变死的），使用 UPDATE；如果事实不再存在，使用 DELETE。\"\n",[204,7133,7134],{"class":206,"line":255},[204,7135,824],{},[204,7137,7138],{"class":206,"line":261},[204,7139,7140],{},"    subject: str = Field(description=\"事实主体，例如 '宠物', '职位'\")\n",[204,7142,7143],{"class":206,"line":267},[204,7144,7145],{},"    fact: str = Field(description=\"事实的具体内容\")\n",[204,7147,7148],{"class":206,"line":272},[204,7149,7150],{},"    confidence: float = Field(description=\"你认为这是用户随口一说，还是确凿事实？0.0 - 1.0\")\n",[204,7152,7153],{"class":206,"line":278},[204,7154,229],{"emptyLinePlaceholder":228},[204,7156,7157],{"class":206,"line":284},[204,7158,7159],{},"class MemoryUpdateList(BaseModel):\n",[204,7161,7162],{"class":206,"line":290},[204,7163,7164],{},"    updates: List[FactOperation]\n",[204,7166,7167],{"class":206,"line":296},[204,7168,229],{"emptyLinePlaceholder":228},[204,7170,7171],{"class":206,"line":301},[204,7172,7173],{},"# 2. 抽取链的 Prompt 设计（极其关键）\n",[204,7175,7176],{"class":206,"line":307},[204,7177,7178],{},"extraction_prompt = \"\"\"\n",[204,7180,7181],{"class":206,"line":313},[204,7182,7183],{},"你是一个底层知识图谱的维护引擎。\n",[204,7185,7186],{"class":206,"line":872},[204,7187,7188],{},"请分析用户最新的发言，并对比目前的已知事实，输出更新指令。\n",[204,7190,7191],{"class":206,"line":886},[204,7192,229],{"emptyLinePlaceholder":228},[204,7194,7195],{"class":206,"line":906},[204,7196,7197],{},"当前已知事实：\n",[204,7199,7200],{"class":206,"line":911},[204,7201,7202],{},"{current_facts}\n",[204,7204,7205],{"class":206,"line":916},[204,7206,229],{"emptyLinePlaceholder":228},[204,7208,7209],{"class":206,"line":922},[204,7210,7211],{},"用户最新发言：\n",[204,7213,7214],{"class":206,"line":937},[204,7215,7216],{},"\"{user_input}\"\n",[204,7218,7219],{"class":206,"line":947},[204,7220,229],{"emptyLinePlaceholder":228},[204,7222,7223],{"class":206,"line":1177},[204,7224,7225],{},"规则：\n",[204,7227,7228],{"class":206,"line":1183},[204,7229,7230],{},"1. 如果用户发言与已知事实冲突，且代表了最新的状态（例如：辞职、宠物离世、搬家），你必须输出 UPDATE 或 DELETE 指令。\n",[204,7232,7233],{"class":206,"line":3353},[204,7234,7235],{},"2. 忽略临时的情绪发泄，只记录客观事实。\n",[204,7237,7238],{"class":206,"line":3373},[204,7239,3654],{},[15,7241,7242],{},[25,7243,7244],{},"运行流转：",[19,7246,7247,7254,7260,7263,7269],{},[22,7248,7249,7250,7253],{},"昨天：用户说“我养了一只柴犬”。大模型输出 ",[85,7251,7252],{},"operation: \"INSERT\", subject: \"宠物\", fact: \"养了一只柴犬\"","。存入 DB。",[22,7255,7256,7257,88],{},"今天：提取 DB，告诉抽取模型当前事实：",[85,7258,7259],{},"{\"宠物\": \"养了一只柴犬\"}",[22,7261,7262],{},"今天用户说：“我的柴犬上周因病去世了，我好难过”。",[22,7264,7265,7266,88],{},"抽取模型比对后，输出：",[85,7267,7268],{},"operation: \"UPDATE\", subject: \"宠物\", fact: \"曾经养过一只柴犬，已离世\"",[22,7270,7271],{},"后端接收到 JSON，执行真正的 SQL 或 Cypher 更新语句。",[15,7273,7274,7275,88],{},"这种架构下，记忆库里永远只有",[25,7276,7277],{},"一份最新的 Truth（真相）",[324,7279],{},[15,7281,7282],{},[25,7283,7284],{},"三、 现代记忆网关 (Memory Gateways)",[15,7286,7287,7288,88],{},"在真实的微服务架构中，我们很少自己在业务代码里手搓上述的一大套逻辑。行业内目前演进出了独立的 ",[25,7289,7290],{},"Memory Service（记忆微服务）",[76,7292,7293,7299],{},[22,7294,7295,7298],{},[25,7296,7297],{},"Zep (开源/商业化)："," 这是一个专为 LLM 打造的记忆网关。你只需要把聊天记录丢给它的 API，它在底层自动用小模型帮你做摘要归档、实体提取、甚至帮你计算哪些记忆因为太久远已经“衰减”而不需要召回了。",[22,7300,7301,7304],{},[25,7302,7303],{},"Mem0："," 另一个新兴的记忆层抽象，主打跨 Application 的记忆共享。",[15,7306,7307,7310,7311,7314],{},[25,7308,7309],{},"架构师建议："," 在项目早期，使用 ",[85,7312,7313],{},"PostgreSQL + pgvector"," 手写简单的抽取逻辑；当你的 Agent 面对几十万 DAU（日活）时，将记忆模块独立剥离，部署一套专用的 Zep 或类似网关。",[324,7316],{},[15,7318,7319],{},"通过区分情景与实体，并引入类似数据库事务的抽取机制，我们的 Agent 终于有了一个既能回忆往昔，又不会混淆现实的稳定大脑。",[15,7321,7322],{},"现在我们理解了如何用图数据库和事件溯源来管理长期记忆。那么，考虑这样一个场景：",[15,7324,7325,7326,7329,7330,7333],{},"如果你的 Agent 正在与用户进行长期的投资咨询。由于图数据库（实体记忆）只保留了",[25,7327,7328],{},"最新","的真相（例如：用户当前只有“低风险承受能力”），但为了分析用户的投资行为，系统其实需要知道用户过去两年风险偏好的",[25,7331,7332],{},"变化轨迹","。在不破坏当前实体记忆“唯一真相”原则的前提下，我们应该如何设计存储模型，既能让大模型快速读取当前状态，又能保留历史演变的过程？",[58,7335,7337],{"id":7336},"_6-agent","6 Agent",[15,7339,7340,7341,7344],{},"在处理“查一下今天天气”这类简单请求时，大模型可以直接调用工具。但面对诸如“深度调研竞品，生成对比图表并发送分析邮件”这种",[25,7342,7343],{},"长程任务 (Long-horizon Task)","，简单的调用往往会导致模型陷入死循环或耗尽 Token。",[15,7346,7347],{},"为了解决这个问题，工业界演进出了三种核心的底层架构模式：",[15,7349,7350,7351],{},"1️⃣ ",[25,7352,7353],{},"ReAct (Reason + Act)：边想边做模式",[76,7355,7356,7366],{},[22,7357,7358,7361,7362,7365],{},[25,7359,7360],{},"机制："," 这是一个 ",[85,7363,7364],{},"思考 -> 行动 -> 观察"," 的不断循环。模型在每一步都会基于当前的观察结果来决定下一步做什么。",[22,7367,7368,7370,7371,7374],{},[25,7369,615],{}," 像一个没有全局规划的执行者。如果任务步骤超过 5 步，它极易在某一步报错时卡死（一直尝试调用同一个错误的工具），最终触发 ",[85,7372,7373],{},"max_iterations"," 导致整个程序崩溃。",[15,7376,7377,7378],{},"2️⃣ ",[25,7379,7380],{},"Plan-and-Execute (规划与执行)：包工头模式",[76,7382,7383,7396],{},[22,7384,7385,7387,7388,7391,7392,7395],{},[25,7386,7360],{}," 将系统拆分为独立的大脑。首先，",[25,7389,7390],{},"Planner（规划器）"," 纵观全局，输出一个严格的 JSON 任务队列（如：1. 搜集数据 2. 数据清洗 3. 绘图）。然后，",[25,7393,7394],{},"Executor（执行器）"," 逐一执行这些原子任务。",[22,7397,7398,7401],{},[25,7399,7400],{},"工程优势："," 完美解决了大模型面对复杂任务时“注意力涣散”的问题，极大地提高了长程任务的成功率。",[15,7403,7404,7405],{},"3️⃣ ",[25,7406,7407],{},"Self-Reflection (自我反思)：审查员模式",[76,7409,7410,7415],{},[22,7411,7412,7414],{},[25,7413,7360],{}," 强制在工作流中引入“对抗”。生成节点 (Generator) 给出初步结果后，将其传递给批评节点 (Critic)。Critic 负责挑错并给出修改建议，将任务打回重做，直到结果通过校验。",[22,7416,7417,7420],{},[25,7418,7419],{},"适用场景："," 对准确率要求极高的代码生成、财务核对或合同撰写。",[63,7422,7424],{"id":7423},"_62-中间件","6.2 中间件",[15,7426,7427,7428,7431,7432,88],{},"在最新的 LangChain 架构中，",[85,7429,7430],{},"create_agent"," 被赋予了全新的能力。仅仅靠 LCEL 的 Callbacks 无法优雅地处理“改变 Agent 内部状态”的需求（比如动态修改 Prompt、截断上下文、或者强行叫停 Agent）。与 FastAPI 拦截外部 HTTP 请求的中间件不同，LangChain 的 Agent Middleware 是",[25,7433,7434],{},"拦截 Agent 内部的思考循环 (Agent Loop)",[15,7436,7437,7438,88],{},"一个标准的 Agent 循环是：",[85,7439,7440],{},"调用模型 -> 选择工具 -> 执行工具 -> 再次调用模型",[15,7442,7443],{},"Middleware 为你提供了钩子（Hooks），可以在这个循环的每一个缝隙插入逻辑：",[76,7445,7446,7456,7469],{},[22,7447,7448,7451,7452,7455],{},[85,7449,7450],{},"before_agent"," / ",[85,7453,7454],{},"after_agent","：整个 Agent 任务开始和结束时。",[22,7457,7458,7451,7461,7464,7465,7468],{},[85,7459,7460],{},"before_model",[85,7462,7463],{},"after_model","：模型每次思考前后（极度适合用来",[25,7466,7467],{},"拦截溢出的 Token"," 或动态注入 Prompt）。",[22,7470,7471,7474,7475,7478],{},[85,7472,7473],{},"wrap_tool_call","：拦截工具调用（适合用来做",[25,7476,7477],{},"重试机制","、权限校验）。",[15,7480,7481],{},[25,7482,7483],{},"代码级注入方式极其简洁：",[195,7485,7487],{"className":197,"code":7486,"language":199,"meta":200,"style":200},"from langchain.agents import create_agent\nfrom langchain.agents.middleware import SummarizationMiddleware, HumanInTheLoopMiddleware\n\n# 通过 middleware 参数将拦截器组成洋葱模型\nagent = create_agent(\n    model=\"gpt-4o\", \n    tools=[...], \n    middleware=[ \n        SummarizationMiddleware(...), # 负责拦截和压缩上下文\n        HumanInTheLoopMiddleware(...) # 负责拦截高危操作\n    ]\n)\n",[85,7488,7489,7494,7499,7503,7508,7513,7518,7523,7528,7533,7538,7543],{"__ignoreMap":200},[204,7490,7491],{"class":206,"line":207},[204,7492,7493],{},"from langchain.agents import create_agent\n",[204,7495,7496],{"class":206,"line":213},[204,7497,7498],{},"from langchain.agents.middleware import SummarizationMiddleware, HumanInTheLoopMiddleware\n",[204,7500,7501],{"class":206,"line":219},[204,7502,229],{"emptyLinePlaceholder":228},[204,7504,7505],{"class":206,"line":225},[204,7506,7507],{},"# 通过 middleware 参数将拦截器组成洋葱模型\n",[204,7509,7510],{"class":206,"line":232},[204,7511,7512],{},"agent = create_agent(\n",[204,7514,7515],{"class":206,"line":238},[204,7516,7517],{},"    model=\"gpt-4o\", \n",[204,7519,7520],{"class":206,"line":244},[204,7521,7522],{},"    tools=[...], \n",[204,7524,7525],{"class":206,"line":250},[204,7526,7527],{},"    middleware=[ \n",[204,7529,7530],{"class":206,"line":255},[204,7531,7532],{},"        SummarizationMiddleware(...), # 负责拦截和压缩上下文\n",[204,7534,7535],{"class":206,"line":261},[204,7536,7537],{},"        HumanInTheLoopMiddleware(...) # 负责拦截高危操作\n",[204,7539,7540],{"class":206,"line":267},[204,7541,7542],{},"    ]\n",[204,7544,7545],{"class":206,"line":272},[204,7546,520],{},[15,7548,7549],{},[25,7550,7551],{},"常用的中间件：",[19,7553,7554,7586,7605],{},[22,7555,7556,7562],{},[25,7557,7558,7561],{},[85,7559,7560],{},"SummarizationMiddleware"," (防爆掉的守护者)",[76,7563,7564,7573],{},[22,7565,7566,7568,7569,7572],{},[25,7567,5154],{},"：多轮对话后，历史记录 + 工具返回的海量 JSON 会瞬间撑爆 ",[85,7570,7571],{},"gpt-4o"," 的 128k 窗口，触发 API 报错。",[22,7574,7575,7578,7579,7581,7582,7585],{},[25,7576,7577],{},"作用","：这个中间件会在 ",[85,7580,7460],{}," 钩子处静默计算 Token。一旦发现即将超载，它会",[25,7583,7584],{},"自动","调用一个小模型，把前 10 轮的历史记录压缩成一段摘要，从而永远保持上下文的安全。",[22,7587,7588,7594],{},[25,7589,7590,7593],{},[85,7591,7592],{},"TodoListMiddleware"," (防迷失的规划师)",[76,7595,7596],{},[22,7597,7598,7600,7601,7604],{},[25,7599,7577],{},"：它会自动给 Agent 注入一个隐藏的 ",[85,7602,7603],{},"write_todos"," 工具，并修改 System Prompt。当 Agent 面对复杂任务时，这个中间件会逼迫 Agent 先建立一个 Todo List，做完一步划掉一步（pending -> in_progress -> completed），极大降低了长程任务的出错率。",[22,7606,7607],{},[25,7608,7609],{},"HumanInTheLoopMiddleware (HITL)",[15,7611,7612],{},"大模型绝对不能在没有人类授权的情况下执行敏感操作（如：发送外部邮件、执行 DELETE SQL、进行真实转账）。",[15,7614,7615],{},[25,7616,7617],{},"底层逻辑：如何让 Python 停下来？",[15,7619,7620,7621,7624,7625,7628,7629,88],{},"如果是在普通的 Python 脚本里，你可能会用 ",[85,7622,7623],{},"input()"," 来等待人类。但在高并发的后端服务器中，这是灾难，会导致线程全部阻塞。",[85,7626,7627],{},"HumanInTheLoopMiddleware"," 完美解决了这个问题，它的核心依赖是 ",[25,7630,7631],{},"Checkpointer（检查点/持久化）",[195,7633,7635],{"className":197,"code":7634,"language":199,"meta":200,"style":200},"from langchain.agents import create_agent\nfrom langchain.agents.middleware import HumanInTheLoopMiddleware\nfrom langgraph.checkpoint.memory import InMemorySaver # 生产中用 PostgresSaver\nfrom langchain_core.tools import tool\n\n# 1. 定义一个高危工具\n@tool\ndef transfer_money(amount: float, target_account: str):\n    \"\"\"转账给目标账户。这是一个敏感操作。\"\"\"\n    return f\"已成功向 {target_account} 转账 {amount} 元。\"\n\n# 2. 初始化持久化引擎 (Agent 挂起时，状态存在这里)\ncheckpointer = InMemorySaver()\n\n# 3. 初始化 HITL 中间件\n# 配置策略：只有在调用 transfer_money 这个特定工具前，才强制拦截！\nhitl_middleware = HumanInTheLoopMiddleware(\n    interrupt_on_tools=[\"transfer_money\"] \n)\n\n# 4. 创建受中间件保护的 Agent\nagent = create_agent(\n    model=llm,\n    tools=[transfer_money, search_weather], # 天气工具不会被拦截\n    middleware=[hitl_middleware],\n    checkpointer=checkpointer # 必须传入 checkpointer\n)\n\n# ================= 运行流转演示 =================\n\n# 指定一个线程 ID，实现多租户与会话隔离\nconfig = {\"configurable\": {\"thread_id\": \"user_101_tx_001\"}}\n\n# 第一阶段：Agent 开始思考，发现需要转账，触发 HITL 中间件！\nprint(\"--- 第一阶段：Agent 发起请求 ---\")\nresponse = agent.invoke({\"input\": \"给张三转账 500 块\"}, config=config)\n\n# 此时！Agent 进程彻底中止，释放服务器资源。\n# 中间件抛出中断状态，你可以在 response 中拿到挂起的请求信息。\nprint(f\"当前 Agent 状态: 已挂起等待审批...\")\n\n# 第二阶段：人类在前端点击了“批准”\nprint(\"\\n--- 第二阶段：人类介入并批准 ---\")\n# 我们拿着同样的 thread_id，告诉 Agent：\"人类同意了，请继续\"\nresponse = agent.invoke(\n    {\"human_approval\": \"approved\"}, # 注入人类决策\n    config=config\n)\n\nprint(f\"Agent 最终执行结果: {response['output']}\")\n",[85,7636,7637,7641,7646,7651,7655,7659,7664,7668,7673,7678,7683,7687,7692,7697,7701,7706,7711,7716,7721,7725,7729,7734,7738,7743,7748,7753,7758,7762,7766,7771,7775,7780,7785,7789,7794,7799,7804,7808,7813,7818,7823,7827,7832,7837,7842,7847,7852,7857,7861,7866],{"__ignoreMap":200},[204,7638,7639],{"class":206,"line":207},[204,7640,7493],{},[204,7642,7643],{"class":206,"line":213},[204,7644,7645],{},"from langchain.agents.middleware import HumanInTheLoopMiddleware\n",[204,7647,7648],{"class":206,"line":219},[204,7649,7650],{},"from langgraph.checkpoint.memory import InMemorySaver # 生产中用 PostgresSaver\n",[204,7652,7653],{"class":206,"line":225},[204,7654,5043],{},[204,7656,7657],{"class":206,"line":232},[204,7658,229],{"emptyLinePlaceholder":228},[204,7660,7661],{"class":206,"line":238},[204,7662,7663],{},"# 1. 定义一个高危工具\n",[204,7665,7666],{"class":206,"line":244},[204,7667,5057],{},[204,7669,7670],{"class":206,"line":250},[204,7671,7672],{},"def transfer_money(amount: float, target_account: str):\n",[204,7674,7675],{"class":206,"line":255},[204,7676,7677],{},"    \"\"\"转账给目标账户。这是一个敏感操作。\"\"\"\n",[204,7679,7680],{"class":206,"line":261},[204,7681,7682],{},"    return f\"已成功向 {target_account} 转账 {amount} 元。\"\n",[204,7684,7685],{"class":206,"line":267},[204,7686,229],{"emptyLinePlaceholder":228},[204,7688,7689],{"class":206,"line":272},[204,7690,7691],{},"# 2. 初始化持久化引擎 (Agent 挂起时，状态存在这里)\n",[204,7693,7694],{"class":206,"line":278},[204,7695,7696],{},"checkpointer = InMemorySaver()\n",[204,7698,7699],{"class":206,"line":284},[204,7700,229],{"emptyLinePlaceholder":228},[204,7702,7703],{"class":206,"line":290},[204,7704,7705],{},"# 3. 初始化 HITL 中间件\n",[204,7707,7708],{"class":206,"line":296},[204,7709,7710],{},"# 配置策略：只有在调用 transfer_money 这个特定工具前，才强制拦截！\n",[204,7712,7713],{"class":206,"line":301},[204,7714,7715],{},"hitl_middleware = HumanInTheLoopMiddleware(\n",[204,7717,7718],{"class":206,"line":307},[204,7719,7720],{},"    interrupt_on_tools=[\"transfer_money\"] \n",[204,7722,7723],{"class":206,"line":313},[204,7724,520],{},[204,7726,7727],{"class":206,"line":872},[204,7728,229],{"emptyLinePlaceholder":228},[204,7730,7731],{"class":206,"line":886},[204,7732,7733],{},"# 4. 创建受中间件保护的 Agent\n",[204,7735,7736],{"class":206,"line":906},[204,7737,7512],{},[204,7739,7740],{"class":206,"line":911},[204,7741,7742],{},"    model=llm,\n",[204,7744,7745],{"class":206,"line":916},[204,7746,7747],{},"    tools=[transfer_money, search_weather], # 天气工具不会被拦截\n",[204,7749,7750],{"class":206,"line":922},[204,7751,7752],{},"    middleware=[hitl_middleware],\n",[204,7754,7755],{"class":206,"line":937},[204,7756,7757],{},"    checkpointer=checkpointer # 必须传入 checkpointer\n",[204,7759,7760],{"class":206,"line":947},[204,7761,520],{},[204,7763,7764],{"class":206,"line":1177},[204,7765,229],{"emptyLinePlaceholder":228},[204,7767,7768],{"class":206,"line":1183},[204,7769,7770],{},"# ================= 运行流转演示 =================\n",[204,7772,7773],{"class":206,"line":3353},[204,7774,229],{"emptyLinePlaceholder":228},[204,7776,7777],{"class":206,"line":3373},[204,7778,7779],{},"# 指定一个线程 ID，实现多租户与会话隔离\n",[204,7781,7782],{"class":206,"line":3718},[204,7783,7784],{},"config = {\"configurable\": {\"thread_id\": \"user_101_tx_001\"}}\n",[204,7786,7787],{"class":206,"line":3723},[204,7788,229],{"emptyLinePlaceholder":228},[204,7790,7791],{"class":206,"line":3729},[204,7792,7793],{},"# 第一阶段：Agent 开始思考，发现需要转账，触发 HITL 中间件！\n",[204,7795,7796],{"class":206,"line":3735},[204,7797,7798],{},"print(\"--- 第一阶段：Agent 发起请求 ---\")\n",[204,7800,7801],{"class":206,"line":4955},[204,7802,7803],{},"response = agent.invoke({\"input\": \"给张三转账 500 块\"}, config=config)\n",[204,7805,7806],{"class":206,"line":4961},[204,7807,229],{"emptyLinePlaceholder":228},[204,7809,7810],{"class":206,"line":5568},[204,7811,7812],{},"# 此时！Agent 进程彻底中止，释放服务器资源。\n",[204,7814,7815],{"class":206,"line":5573},[204,7816,7817],{},"# 中间件抛出中断状态，你可以在 response 中拿到挂起的请求信息。\n",[204,7819,7820],{"class":206,"line":5783},[204,7821,7822],{},"print(f\"当前 Agent 状态: 已挂起等待审批...\")\n",[204,7824,7825],{"class":206,"line":5789},[204,7826,229],{"emptyLinePlaceholder":228},[204,7828,7829],{"class":206,"line":5794},[204,7830,7831],{},"# 第二阶段：人类在前端点击了“批准”\n",[204,7833,7834],{"class":206,"line":5800},[204,7835,7836],{},"print(\"\\n--- 第二阶段：人类介入并批准 ---\")\n",[204,7838,7839],{"class":206,"line":5806},[204,7840,7841],{},"# 我们拿着同样的 thread_id，告诉 Agent：\"人类同意了，请继续\"\n",[204,7843,7844],{"class":206,"line":5812},[204,7845,7846],{},"response = agent.invoke(\n",[204,7848,7849],{"class":206,"line":6807},[204,7850,7851],{},"    {\"human_approval\": \"approved\"}, # 注入人类决策\n",[204,7853,7854],{"class":206,"line":6813},[204,7855,7856],{},"    config=config\n",[204,7858,7859],{"class":206,"line":6818},[204,7860,520],{},[204,7862,7864],{"class":206,"line":7863},49,[204,7865,229],{"emptyLinePlaceholder":228},[204,7867,7869],{"class":206,"line":7868},50,[204,7870,7871],{},"print(f\"Agent 最终执行结果: {response['output']}\")\n",[15,7873,7874,7877],{},[25,7875,7876],{},"HITL 中间件的工程价值："," 彻底解耦了“AI 的思考速度”和“人类的审批速度”。哪怕人类第二天早上才来点击批准，只要 Postgres 数据库还在，Agent 就能从昨天被冻结的那一秒钟瞬间“复活”并继续执行。",[324,7879],{},[7881,7882,7883],"style",{},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sScJk, html code.shiki .sScJk{--shiki-default:#6F42C1;--shiki-dark:#B392F0}html pre.shiki code .s4XuR, html code.shiki .s4XuR{--shiki-default:#E36209;--shiki-dark:#FFAB70}",{"title":200,"searchDepth":219,"depth":225,"links":7885},[7886,7891,7897,7903,7910,7915],{"id":60,"depth":213,"text":61,"children":7887},[7888,7889,7890],{"id":65,"depth":219,"text":66},{"id":328,"depth":219,"text":329},{"id":596,"depth":219,"text":597},{"id":1345,"depth":213,"text":1346,"children":7892},[7893,7894,7895,7896],{"id":1395,"depth":219,"text":1396},{"id":1883,"depth":219,"text":1884},{"id":2382,"depth":219,"text":2383},{"id":2710,"depth":219,"text":2711},{"id":3015,"depth":213,"text":3016,"children":7898},[7899,7900,7901,7902],{"id":3402,"depth":219,"text":3403},{"id":3768,"depth":219,"text":3769},{"id":4123,"depth":219,"text":4124},{"id":4388,"depth":219,"text":4389},{"id":4508,"depth":213,"text":4509,"children":7904},[7905,7907,7908,7909],{"id":4521,"depth":219,"text":7906},"4.1 核心基座选型：为什么企业级应用必须用 BaseTool？",{"id":4969,"depth":219,"text":4970},{"id":5145,"depth":219,"text":5146},{"id":5947,"depth":219,"text":5948},{"id":6093,"depth":213,"text":6094,"children":7911},[7912,7913,7914],{"id":6166,"depth":219,"text":6167},{"id":6510,"depth":219,"text":6511},{"id":6891,"depth":219,"text":6892},{"id":7336,"depth":213,"text":7337,"children":7916},[7917],{"id":7423,"depth":219,"text":7424},"agent工程化的第一步","md",{"date":7921,"image":7922,"alt":5,"tags":7923,"published":228,"trending":228},"2026/2/20","/blogs-img/Langchain.png",[7924],"agent","/docs/langchain",{"title":5,"description":7918},"docs/LangChain","B3L25_dvUhe3KEzPT7X5qmNja37YNgdakeqFSvpL9eI",1778575225199]