Search code examples
vectoropenai-apiweaviate

ValueError: Document prompt requires documents to have metadata variables: ['source']. Received document with missing metadata: ['source']


I have used lanchain, vector database(weaviate/FAISS) and chain (RetrievalQAWithSourcesChain)



from langchain.vectorstores.weaviate import Weaviate
from langchain.llms import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
import weaviate
from langchain.prompts.prompt import PromptTemplate
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings


# API Key needs to be passed in playground
OPEN_API_KEY="sk-xxxxx"


client = weaviate.Client(
    url="https://xxxxx.weaviate.network",
    additional_headers={
        "X-OpenAI-Api-Key": OPEN_API_KEY
    }
)


vectorstore = Weaviate(client, "Products", "description")

# vectorstore = FAISS.load_local(
#         "./working_fas",
#         OpenAIEmbeddings(openai_api_key=OPEN_API_KEY)
#     )

llm = OpenAI(model_name="text-davinci-003", temperature=0,
             max_tokens=200, openai_api_key=OPEN_API_KEY)

template = """
Return product and price information 
--------------------
{summaries}
"""

prompt = PromptTemplate(
    input_variables=["summaries"],
    template=template,
)

chain = RetrievalQAWithSourcesChain.from_chain_type(llm=llm,  retriever=vectorstore.as_retriever(),
                                                     return_source_documents=False,
                                                 chain_type_kwargs = {"prompt": prompt}
       )


result = chain("suggest me an watch", return_only_outputs=True)
print(result)

With FAISS, I am getting some result
With Weaviate, I am getting ValueError

Here is my schema + code on how i insert datas in class

# Define class and property definitions for products


class_def = {
    "class": "Products",
    "description": "Products",
    "properties": [
            {
                "dataType": ["text"],
                "description": "product category",
                "name": "category"
            },
        {
                "name": "sku",
                "description": "product sku",
                "dataType": ["text"]
            },
        {
                "dataType": ["text"],
                "name": "product",
                 "description": "product name"
            },
        {
                "dataType": ["text"],
                "name": "description",
                 "description": "product description"
            },
        {
            "name": "price",
            "dataType": ["number"],
                 "description": "product price"

            },
        {
            "name": "breadcrumb",
            "dataType": ["text"],
                 "description": "product breadcrumb"

            },
        {
            "name": "source",
            "dataType": ["text"],
             "description": "product url",
            },
        {
            "name": "money_back",
            "dataType": ["boolean"],
             "description": "money_back / refund available for the product"

            },
        {
            "name": "rating",
            "dataType": ["number"],
             "description": "product rating"

            },
        {
                "name": "total_reviews",
            "dataType": ["int"],
             "description": "product total_reviews"

                },
        {
            "name": "tags",
            "dataType": ["text"],
             "description": "product tags"
            },
        {
            "name": "type",
            "dataType": ["text"],
                   "description": "product type"
            }
    ],
"vectorizer": "text2vec-openai",

}

# Create Class


client.schema.create_class(class_def)

# Insert datas into class

import pandas as pd
import time

df = pd.read_csv("testing.csv")

print(len(df))
for index,row in df.iterrows():
    time.sleep(1)
    properties = {
            "category": row["category"],
            "sku": row["sku"],
            "product": row["product"],
            "description": row["description"],
            "price": row["price"],
            "breadcrumb": row["breadcrumb"],
            "source": row["source"],
             "money_back": row["money_back"],
             "rating": row["rating"],
             "total_reviews": row["total_reviews"],
             "tags": row["tags"],
              "type": row["type"],
    }
    print(properties)
    client.data_object.create(properties, "Products")
    time.sleep(1)

Tried adjusting the prompt!
No luck


Solution

  • I found the answer with some help from the langchain community. To add metadata you must initialise the weaviate vectorstore with attributes=["source"] parameter

    vectorstore = Weaviate(client, "Products", "description", attributes=["source"])