本教程将帮助你熟悉 LangChain 的文档加载器、嵌入和向量存储抽象。这些抽象旨在支持从(向量)数据库和其他来源检索数据,以与大语言模型(LLM)工作流程集成。它们对于需要获取数据作为模型推理一部分进行推理的应用非常重要,如检索增强生成(RAG)的情况,参见 RAG。在这里,我们将基于 PDF 文档构建一个搜索引擎。这将允许我们检索 PDF 中与输入查询相似的段落。本指南还包括在搜索引擎之上的最小 RAG 实现。
from langchain_core.documents import Documentdocuments = [ Document( page_content="Dogs are great companions, known for their loyalty and friendliness.", metadata={"source": "mammal-pets-doc"}, ), Document( page_content="Cats are independent pets that often enjoy their own space.", metadata={"source": "mammal-pets-doc"}, ),]
然而,LangChain 生态系统实现了与数百种常见来源集成的文档加载器。这使得将这些来源的数据纳入你的 AI 应用程序变得容易。
Table of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549FORM 10-K(Mark One)☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934FO{'source': '../example_data/nke-10k-2023.pdf', 'page': 0}
import getpassimport osif not os.environ.get("OPENAI_API_KEY"): os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")from langchain_openai import OpenAIEmbeddingsembeddings = OpenAIEmbeddings(model="text-embedding-3-large")
pip install -U "langchain-openai"
import getpassimport osif not os.environ.get("AZURE_OPENAI_API_KEY"): os.environ["AZURE_OPENAI_API_KEY"] = getpass.getpass("Enter API key for Azure: ")from langchain_openai import AzureOpenAIEmbeddingsembeddings = AzureOpenAIEmbeddings( azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"], openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],)
pip install -qU langchain-google-genai
import getpassimport osif not os.environ.get("GOOGLE_API_KEY"): os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")from langchain_google_genai import GoogleGenerativeAIEmbeddingsembeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
pip install -qU langchain-google-vertexai
from langchain_google_vertexai import VertexAIEmbeddingsembeddings = VertexAIEmbeddings(model="text-embedding-005")
pip install -qU langchain-aws
from langchain_aws import BedrockEmbeddingsembeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v2:0")
pip install -qU langchain-huggingface
from langchain_huggingface import HuggingFaceEmbeddingsembeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-mpnet-base-v2", encode_kwargs={"normalize_embeddings": True},)
pip install -qU langchain-ollama
from langchain_ollama import OllamaEmbeddingsembeddings = OllamaEmbeddings(model="llama3")
pip install -qU langchain-cohere
import getpassimport osif not os.environ.get("COHERE_API_KEY"): os.environ["COHERE_API_KEY"] = getpass.getpass("Enter API key for Cohere: ")from langchain_cohere import CohereEmbeddingsembeddings = CohereEmbeddings(model="embed-english-v3.0")
pip install -qU langchain-mistralai
import getpassimport osif not os.environ.get("MISTRALAI_API_KEY"): os.environ["MISTRALAI_API_KEY"] = getpass.getpass("Enter API key for MistralAI: ")from langchain_mistralai import MistralAIEmbeddingsembeddings = MistralAIEmbeddings(model="mistral-embed")
pip install -qU langchain-nomic
import getpassimport osif not os.environ.get("NOMIC_API_KEY"): os.environ["NOMIC_API_KEY"] = getpass.getpass("Enter API key for Nomic: ")from langchain_nomic import NomicEmbeddingsembeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
pip install -qU langchain-nvidia-ai-endpoints
import getpassimport osif not os.environ.get("NVIDIA_API_KEY"): os.environ["NVIDIA_API_KEY"] = getpass.getpass("Enter API key for NVIDIA: ")from langchain_nvidia_ai_endpoints import NVIDIAEmbeddingsembeddings = NVIDIAEmbeddings(model="NV-Embed-QA")
pip install -qU langchain-voyageai
import getpassimport osif not os.environ.get("VOYAGE_API_KEY"): os.environ["VOYAGE_API_KEY"] = getpass.getpass("Enter API key for Voyage AI: ")from langchain-voyageai import VoyageAIEmbeddingsembeddings = VoyageAIEmbeddings(model="voyage-3")
pip install -qU langchain-ibm
import getpassimport osif not os.environ.get("WATSONX_APIKEY"): os.environ["WATSONX_APIKEY"] = getpass.getpass("Enter API key for IBM watsonx: ")from langchain_ibm import WatsonxEmbeddingsembeddings = WatsonxEmbeddings( model_id="ibm/slate-125m-english-rtrvr", url="https://us-south.ml.cloud.ibm.com", project_id="<WATSONX PROJECT_ID>",)
pip install -qU langchain-core
from langchain_core.embeddings import DeterministicFakeEmbeddingembeddings = DeterministicFakeEmbedding(size=4096)
pip install -qU langchain-isaacus
import getpassimport osif not os.environ.get("ISAACUS_API_KEY"):os.environ["ISAACUS_API_KEY"] = getpass.getpass("Enter API key for Isaacus: ")from langchain_isaacus import IsaacusEmbeddingsembeddings = IsaacusEmbeddings(model="kanon-2-embedder")
from langchain_chroma import Chromavector_store = Chroma( collection_name="example_collection", embedding_function=embeddings, persist_directory="./chroma_langchain_db", # Where to save data locally, remove if not necessary)
results = vector_store.similarity_search( "How many distribution centers does Nike have in the US?")print(results[0])
page_content='direct to consumer operations sell products through the following number of retail stores in the United States:U.S. RETAIL STORES NUMBERNIKE Brand factory stores 213NIKE Brand in-line stores (including employee-only stores) 74Converse stores (including factory stores) 82TOTAL 369In the United States, NIKE has eight significant distribution centers. Refer to Item 2. Properties for further information.2023 FORM 10-K 2' metadata={'page': 4, 'source': '../example_data/nke-10k-2023.pdf', 'start_index': 3125}
异步查询:
results = await vector_store.asimilarity_search("When was Nike incorporated?")print(results[0])
page_content='Table of ContentsPART IITEM 1. BUSINESSGENERALNIKE, Inc. was incorporated in 1967 under the laws of the State of Oregon. As used in this Annual Report on Form 10-K (this "Annual Report"), the terms "we," "us," "our,""NIKE" and the "Company" refer to NIKE, Inc. and its predecessors, subsidiaries and affiliates, collectively, unless the context indicates otherwise.Our principal business activity is the design, development and worldwide marketing and selling of athletic footwear, apparel, equipment, accessories and services. NIKE isthe largest seller of athletic footwear and apparel in the world. We sell our products through NIKE Direct operations, which are comprised of both NIKE-owned retail storesand sales through our digital platforms (also referred to as "NIKE Brand Digital"), to retail accounts and to a mix of independent distributors, licensees and sales' metadata={'page': 3, 'source': '../example_data/nke-10k-2023.pdf', 'start_index': 0}
返回分数:
# 请注意,提供商实现的分数不同;这里的分数# 是一个与相似度成反比的距离度量。results = vector_store.similarity_search_with_score("What was Nike's revenue in 2023?")doc, score = results[0]print(f"Score: {score}\n")print(doc)
Score: 0.23699893057346344page_content='Table of ContentsFISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTSThe following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and major product line:FISCAL 2023 COMPARED TO FISCAL 2022•NIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported and currency-neutral basis, respectively.The increase was due to higher revenues in North America, Europe, Middle East & Africa ("EMEA"), APLA and Greater China, which contributed approximately 7, 6,2 and 1 percentage points to NIKE, Inc. Revenues, respectively.•NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues, increased 10% and 16% on a reported and currency-neutral basis, respectively. Thisincrease was primarily due to higher revenues in Men's, the Jordan Brand, Women's and Kids' which grew 17%, 35%,11% and 10%, respectively, on a wholesaleequivalent basis.' metadata={'page': 35, 'source': '../example_data/nke-10k-2023.pdf', 'start_index': 0}
基于与嵌入查询的相似度返回文档:
embedding = embeddings.embed_query("How were Nike's margins impacted in 2023?")results = vector_store.similarity_search_by_vector(embedding)print(results[0])
page_content='Table of ContentsGROSS MARGINFISCAL 2023 COMPARED TO FISCAL 2022For fiscal 2023, our consolidated gross profit increased 4% to $22,292 million compared to $21,479 million for fiscal 2022. Gross margin decreased 250 basis points to43.5% for fiscal 2023 compared to 46.0% for fiscal 2022 due to the following:*Wholesale equivalentThe decrease in gross margin for fiscal 2023 was primarily due to:•Higher NIKE Brand product costs, on a wholesale equivalent basis, primarily due to higher input costs and elevated inbound freight and logistics costs as well asproduct mix;•Lower margin in our NIKE Direct business, driven by higher promotional activity to liquidate inventory in the current period compared to lower promotional activity inthe prior period resulting from lower available inventory supply;•Unfavorable changes in net foreign currency exchange rates, including hedges; and•Lower off-price margin, on a wholesale equivalent basis.This was partially offset by:' metadata={'page': 36, 'source': '../example_data/nke-10k-2023.pdf', 'start_index': 0}
from typing import Listfrom langchain_core.documents import Documentfrom langchain_core.runnables import chain@chaindef retriever(query: str) -> List[Document]: return vector_store.similarity_search(query, k=1)retriever.batch( [ "How many distribution centers does Nike have in the US?", "When was Nike incorporated?", ],)
[[Document(metadata={'page': 4, 'source': '../example_data/nke-10k-2023.pdf', 'start_index': 3125}, page_content='direct to consumer operations sell products through the following number of retail stores in the United States:\nU.S. RETAIL STORES NUMBER\nNIKE Brand factory stores 213 \nNIKE Brand in-line stores (including employee-only stores) 74 \nConverse stores (including factory stores) 82 \nTOTAL 369 \nIn the United States, NIKE has eight significant distribution centers. Refer to Item 2. Properties for further information.\n2023 FORM 10-K 2')], [Document(metadata={'page': 3, 'source': '../example_data/nke-10k-2023.pdf', 'start_index': 0}, page_content='Table of Contents\nPART I\nITEM 1. BUSINESS\nGENERAL\nNIKE, Inc. was incorporated in 1967 under the laws of the State of Oregon. As used in this Annual Report on Form 10-K (this "Annual Report"), the terms "we," "us," "our,"\n"NIKE" and the "Company" refer to NIKE, Inc. and its predecessors, subsidiaries and affiliates, collectively, unless the context indicates otherwise.\nOur principal business activity is the design, development and worldwide marketing and selling of athletic footwear, apparel, equipment, accessories and services. NIKE is\nthe largest seller of athletic footwear and apparel in the world. We sell our products through NIKE Direct operations, which are comprised of both NIKE-owned retail stores\nand sales through our digital platforms (also referred to as "NIKE Brand Digital"), to retail accounts and to a mix of independent distributors, licensees and sales')]]
retriever = vector_store.as_retriever( search_type="similarity", search_kwargs={"k": 1},)retriever.batch( [ "How many distribution centers does Nike have in the US?", "When was Nike incorporated?", ],)
[[Document(metadata={'page': 4, 'source': '../example_data/nke-10k-2023.pdf', 'start_index': 3125}, page_content='direct to consumer operations sell products through the following number of retail stores in the United States:\nU.S. RETAIL STORES NUMBER\nNIKE Brand factory stores 213 \nNIKE Brand in-line stores (including employee-only stores) 74 \nConverse stores (including factory stores) 82 \nTOTAL 369 \nIn the United States, NIKE has eight significant distribution centers. Refer to Item 2. Properties for further information.\n2023 FORM 10-K 2')], [Document(metadata={'page': 3, 'source': '../example_data/nke-10k-2023.pdf', 'start_index': 0}, page_content='Table of Contents\nPART I\nITEM 1. BUSINESS\nGENERAL\nNIKE, Inc. was incorporated in 1967 under the laws of the State of Oregon. As used in this Annual Report on Form 10-K (this "Annual Report"), the terms "we," "us," "our,"\n"NIKE" and the "Company" refer to NIKE, Inc. and its predecessors, subsidiaries and affiliates, collectively, unless the context indicates otherwise.\nOur principal business activity is the design, development and worldwide marketing and selling of athletic footwear, apparel, equipment, accessories and services. NIKE is\nthe largest seller of athletic footwear and apparel in the world. We sell our products through NIKE Direct operations, which are comprised of both NIKE-owned retail stores\nand sales through our digital platforms (also referred to as "NIKE Brand Digital"), to retail accounts and to a mix of independent distributors, licensees and sales')]]