Search code examples
pythonopenai-apilangchainllama-index

chatbot that will generate a document draft with python, langchain, and openai


I'm attempted to pass draft documents and have my chatbot generate a template using a prompt create a non disclosure agreement draft for California between mike llc and fantasty world. with my code below the response i'm getting is: "I'm sorry, but I cannot generate a non-disclosure agreement draft for you. However, you can use the provided context information as a template to create a non-disclosure agreement between Mike LLC and fantasty world. Just replace the placeholders in the template with the appropriate names and information for your specific agreement.

Here is my setup:

import sys
import os
import openai
import constants
import gradio as gr
from langchain.chat_models import ChatOpenAI

from llama_index import SimpleDirectoryReader, GPTListIndex, GPTVectorStoreIndex, LLMPredictor, PromptHelper, load_index_from_storage

# Disable SSL certificate verification (for debugging purposes)
os.environ['REQUESTS_CA_BUNDLE'] = ''  # Set it to an empty string

os.environ["OPENAI_API_KEY"] = constants.APIKEY
openai.api_key = os.getenv("OPENAI_API_KEY")
print(os.getenv("OPENAI_API_KEY"))

def createVecorIndex(path):
    max_input = 4096
    tokens = 512
    chunk_size = 600
    max_chunk_overlap = 0.1

    prompt_helper = PromptHelper(max_input, tokens, max_chunk_overlap, chunk_size_limit=chunk_size)

    #define llm
    llmPredictor = LLMPredictor(llm=ChatOpenAI(temperature=.7, model_name='gpt-3.5-turbo', max_tokens=tokens))

    #load data
    docs = SimpleDirectoryReader(path).load_data()

    #create vector index
    vectorIndex = GPTVectorStoreIndex(docs, llmpredictor=llmPredictor, prompt_helper=prompt_helper)
    vectorIndex.storage_context.persist(persist_dir='vectorIndex.json')

    return vectorIndex

vectorIndex = createVecorIndex('docs')

In my docs directory, I have a few examples of non-disclosure agreements to create the vector index.

This was my first attempt at the query:

def chatbot(input_index):
    query_engine = vectorIndex.as_query_engine()
    response = query_engine.query(input_index)
    return response.response

gr.Interface(fn=chatbot, inputs="text", outputs="text", title="Super Awesome Chatbot").launch()

I can't seem to get it to generate the draft, it keeps giving me the "I cannot generate a draft" response

I also tried to create a clause for the word draft, but the setup below is essential useing the trained model instead my vector.

def chatbot(input_index):
    query_engine = vectorIndex.as_query_engine()

    # If the "draft" clause is active:
    if "draft" in input_index.lower():
        # Query the vectorIndex for relevant information/context
        vector_response = query_engine.query(input_index).response
        print(vector_response)
        # Use vector_response as context to query the OpenAI API for a draft
        prompt = f"Based on the information: '{vector_response}', generate a draft for the input: {input_index}"
        
        response = openai.Completion.create(
            engine="text-davinci-002",
            prompt=prompt,
            max_tokens=512,
            temperature=0.2
        )
        
        openai_response = response.choices[0].text.strip()
        
        return openai_response

    # If "draft" clause isn't active, use just the vectorIndex response
    else:
        print('else clause')
        return query_engine.query(input_index).response

Solution

  • Here is how I solved it:

    import os
    import openai
    import constants
    import gradio as gr
    from langchain.chat_models import ChatOpenAI
    
    from llama_index import SimpleDirectoryReader, GPTListIndex, GPTVectorStoreIndex, LLMPredictor, PromptHelper, load_index_from_storage
    
    # Disable SSL certificate verification (for debugging purposes)
    os.environ['REQUESTS_CA_BUNDLE'] = ''  # Set it to an empty string
    
    os.environ["OPENAI_API_KEY"] = constants.APIKEY
    openai.api_key = os.getenv("OPENAI_API_KEY")
    
    def createVecorIndex(path):
        max_input = 4096
        tokens = 512
        chunk_size = 600
        max_chunk_overlap = 0.1
    
        prompt_helper = PromptHelper(max_input, tokens, max_chunk_overlap, chunk_size_limit=chunk_size)
    
        #define llm
        llmPredictor = LLMPredictor(llm=ChatOpenAI(temperature=0.3, model_name='gpt-3.5-turbo', max_tokens=tokens))
    
        #load data
        docs = SimpleDirectoryReader(path).load_data()
    
        #create vector index
        vectorIndex = GPTVectorStoreIndex(docs, llmpredictor=llmPredictor, prompt_helper=prompt_helper)
        vectorIndex.storage_context.persist(persist_dir='vectorIndex.json')
    
        return vectorIndex
    
    vectorIndex = createVecorIndex('docs')
    
    def chatbot(input_index):
        # query_engine = vectorIndex.as_query_engine() simple query engine only
        query_engine = vectorIndex.as_chat_engine()
        response = query_engine.query(input_index)
        return response.response
    
    gr.Interface(fn=chatbot, inputs="text", outputs="text", title="Super Awesome Chatbot").launch()
    

    I updated the engine to as_chat_engine() instead of as_query_engine() and now I'm getting more complex responses