I'm trying to create a RAG, I start by breaking down the document into chunks, send it to a localy hosted embedding model, get the vectors back, and then I get stuck with the FAISS part.
My problem is that all of what I find seems to want to connect to hugging face or something, and the langchain stuff seems to be geared towards doing that so I will get my embeddings from an external source or have to spend days re-running code until I get a model to download fully without telling me the hash doesnt match because it only tryed downloading 98.4% of it. why do that when I have a perfectly good server.
Here's my proof of concept code
import requests
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import retrieval_qa, RetrievalQA
from langchain_community.vectorstores.faiss import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
lm_studio_endpoint = "http://127.0.0.1:1234"
def setup_qa_system(file_path):
# Load and split PDF documents
try:
loader = PyPDFLoader(file_path)
docs = loader.load_and_split()
except Exception as e:
print(f"Error loading PDF: {e}")
return None
# Split documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
chunks = text_splitter.split_documents(docs)
# Function to get embeddings
def get_embeddings(texts):
print(f"Getting embeddings for {len(texts)} texts.") # Debug output
try:
response = requests.post(f"{lm_studio_endpoint}/v1/embeddings", json={
"input": texts,
'model': 'text-embedding-granite-embedding-278m-multilingual'
})
response.raise_for_status()
embeddings_data = response.json().get('data')
# Extract embeddings from the response
embeddings = [item['embedding'] for item in embeddings_data]
print(f"Received {len(embeddings)} embeddings.") # Debug output
return embeddings
except requests.exceptions.RequestException as e:
print(f"Error getting embeddings: {e}")
return []
texts = [chunk.page_content for chunk in chunks] # Extract texts from chunks
print(f"Number of chunks: {len(chunks)}")
print(f"Number of texts being sent: {len(texts)}")
embeddings = get_embeddings(texts) # Get embeddings
# Check if embeddings were retrieved successfully
if not embeddings or len(embeddings) != len(chunks):
print(f"Error: Number of embeddings ({len(embeddings)}) does not match number of chunks ({len(chunks)}). Exiting setup.")
return None
# Create a list of (text, embedding) tuples
text_embeddings = list(zip(texts, embeddings))
# Create the FAISS vector store using the list of tuples
vector_store = FAISS.from_embeddings(text_embeddings, embeddings)# <<<------THIS IS WHERE IM GETTING STUCK
retriever = vector_store.as_retriever()
def query_local_model(question, context):
try:
response = requests.post(f"{lm_studio_endpoint}/v1/completions", json={
"prompt": f"Question: {question}\nContext: {context}\nAnswer:",
"max_tokens": 150
})
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error querying LM Studio: {e}")
return {}
# Adjust the QA chain to use the local model query function
qa_chain = RetrievalQA.from_chain_type(
llm=query_local_model,
retriever=retriever
)
return qa_chain
if __name__ == '__main__':
qa_chain = setup_qa_system('Documents/OfMiceAndMen.pdf')
if qa_chain:
# Example query
query = "What is the main theme of 'Of Mice and Men'?"
relevant_chunks = qa_chain.retriever.retrieve(query)
context = " ".join(chunk.text for chunk in relevant_chunks) # Prepare context from retrieved chunks
result = qa_chain.llm(query, context) # Call the local model
print(result)
The output
Number of chunks: 214
Number of texts being sent: 214
Getting embeddings for 214 texts.
Received 214 embeddings.
Traceback (most recent call last):
File "MyProjectDir\RagTest\main.py", line 83, in <module>
qa_chain = setup_qa_system('Documents/OfMiceAndMen.pdf')
File "MyProjectDir\RagTest\main.py", line 59, in setup_qa_system
vector_store = FAISS.from_texts(text_embeddings)
TypeError: FAISS.from_texts() missing 1 required positional argument: 'embedding'
And a sample of the server log
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [DEBUG] [INFO] [LlamaEmbeddingEngine] All parsed chunks succesfully embedded!
2024-12-27 13:13:47 [INFO] Returning embeddings (not shown in logs)
The second parameter of the FAISS.from_embeddings is an Embeddings object that is in charge of the embedding. If you want to use your own function, you could wrap it inside a class inheriting from the Embbedings abstract class (cf. this page and the linked source code)