Search code examples
pythonraglm-studio

Trying to create a RAG with everything local. im stuck on using the embeddings


I'm trying to create a RAG, I start by breaking down the document into chunks, send it to a localy hosted embedding model, get the vectors back, and then I get stuck with the FAISS part.

My problem is that all of what I find seems to want to connect to hugging face or something, and the langchain stuff seems to be geared towards doing that so I will get my embeddings from an external source or have to spend days re-running code until I get a model to download fully without telling me the hash doesnt match because it only tryed downloading 98.4% of it. why do that when I have a perfectly good server.

Here's my proof of concept code

import requests
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import retrieval_qa, RetrievalQA
from langchain_community.vectorstores.faiss import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

lm_studio_endpoint = "http://127.0.0.1:1234"

def setup_qa_system(file_path):
    # Load and split PDF documents
    try:
        loader = PyPDFLoader(file_path)
        docs = loader.load_and_split()
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

    # Split documents into smaller chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    chunks = text_splitter.split_documents(docs)

    # Function to get embeddings
    def get_embeddings(texts):
        print(f"Getting embeddings for {len(texts)} texts.")  # Debug output
        try:
            response = requests.post(f"{lm_studio_endpoint}/v1/embeddings", json={
                "input": texts,
                'model': 'text-embedding-granite-embedding-278m-multilingual'
            })
            response.raise_for_status()
            embeddings_data = response.json().get('data')

            # Extract embeddings from the response
            embeddings = [item['embedding'] for item in embeddings_data]

            print(f"Received {len(embeddings)} embeddings.")  # Debug output
            return embeddings
        except requests.exceptions.RequestException as e:
            print(f"Error getting embeddings: {e}")
            return []

    texts = [chunk.page_content for chunk in chunks]  # Extract texts from chunks
    print(f"Number of chunks: {len(chunks)}")
    print(f"Number of texts being sent: {len(texts)}")

    embeddings = get_embeddings(texts)  # Get embeddings

    # Check if embeddings were retrieved successfully
    if not embeddings or len(embeddings) != len(chunks):
        print(f"Error: Number of embeddings ({len(embeddings)}) does not match number of chunks ({len(chunks)}). Exiting setup.")
        return None

    # Create a list of (text, embedding) tuples
    text_embeddings = list(zip(texts, embeddings))

    # Create the FAISS vector store using the list of tuples
    vector_store = FAISS.from_embeddings(text_embeddings, embeddings)# <<<------THIS IS WHERE IM GETTING STUCK

    retriever = vector_store.as_retriever()

    def query_local_model(question, context):
        try:
            response = requests.post(f"{lm_studio_endpoint}/v1/completions", json={
                "prompt": f"Question: {question}\nContext: {context}\nAnswer:",
                "max_tokens": 150
            })
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"Error querying LM Studio: {e}")
            return {}

    # Adjust the QA chain to use the local model query function
    qa_chain = RetrievalQA.from_chain_type(
        llm=query_local_model,
        retriever=retriever
    )
    return qa_chain

if __name__ == '__main__':
    qa_chain = setup_qa_system('Documents/OfMiceAndMen.pdf')

    if qa_chain:
        # Example query
        query = "What is the main theme of 'Of Mice and Men'?"
        relevant_chunks = qa_chain.retriever.retrieve(query)

        context = " ".join(chunk.text for chunk in relevant_chunks)  # Prepare context from retrieved chunks
        result = qa_chain.llm(query, context)  # Call the local model
        print(result)

The output

Number of chunks: 214
Number of texts being sent: 214
Getting embeddings for 214 texts.
Received 214 embeddings.
Traceback (most recent call last):
  File "MyProjectDir\RagTest\main.py", line 83, in <module>
    qa_chain = setup_qa_system('Documents/OfMiceAndMen.pdf')
  File "MyProjectDir\RagTest\main.py", line 59, in setup_qa_system
    vector_store = FAISS.from_texts(text_embeddings)
TypeError: FAISS.from_texts() missing 1 required positional argument: 'embedding'

And a sample of the server log

2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47  [INFO] Returning embeddings (not shown in logs)

Solution

  • The second parameter of the FAISS.from_embeddings is an Embeddings object that is in charge of the embedding. If you want to use your own function, you could wrap it inside a class inheriting from the Embbedings abstract class (cf. this page and the linked source code)