Search code examples
pythonlangchainlarge-language-modelpdfium

How to use pdf document in the agent using Langchain


My code uses "wikipedia" to search for the relevant content. Below is the code

Load tools

tools = load_tools(
    ["wikipedia"],
    llm=llm)
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True,
    verbose=False
)
out = agent(f"Does {var_1} cause {var_2} or the other way around?.")

Instead of "wikipedia", I want to use my own pdf document that is available in my local. Can anyone help me in doing this?

I have tried using the below code

from langchain.document_loaders import PyPDFium2Loader
loader = PyPDFium2Loader("hunter-350-dual-channel.pdf")
data = loader.load()

but i am not sure how to include this in the agent.


Solution

  • You can use RetrievalQA to generate a tool.

    Just like below:

    from langchain.agents import AgentType, Tool, initialize_agent
    from langchain.agents import load_tools
    from langchain.chains import RetrievalQA
    from langchain.embeddings.openai import OpenAIEmbeddings
    from langchain.llms import OpenAI
    from langchain.text_splitter import CharacterTextSplitter
    from langchain.vectorstores import Chroma
    from langchain.document_loaders import TextLoader
    
    llm = OpenAI()
    
    loader = TextLoader("union.txt", encoding="utf-8")
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    
    embeddings = OpenAIEmbeddings(api_key="sk-pIgio******************", base_url="https://api.gptapi.us/v1")
    docsearch = Chroma.from_documents(texts, embeddings, collection_name="state-of-union")
    
    state_of_union = RetrievalQA.from_chain_type(
        llm=llm, chain_type="stuff", retriever=docsearch.as_retriever()
    )
    
    tools = load_tools(["wikipedia"], llm=llm)
    tools += [
        Tool(
            name="State of Union QA System",
            func=state_of_union.run,
            description="useful for when you need to answer questions about the most recent state of the union address. Input should be a fully formed question.",
        ),   
    ]
    
    agent = initialize_agent(
        tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
    )
    agent.run(
        "What did biden say about ketanji brown jackson in the state of the union address?"
    )