I'm trying to set up a simple example in DB-GPT which uses Elasticsearch as the vector store backend. This is part of the knowledge base initialization process where BM25Assembler is used for document retrieval and ranking.
I have run DB-GPT with Ollama and Elasticsearch is deployed using Docker for both. Everything is fine as below
I'm encountering a ConnectionError while setting up a knowledge base in DB-GPT using BM25Assembler. The error occurs during the initialization of the assembler. After run
python examples/rag/bm25_retriever_example.py
Traceback (most recent call last):
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/urllib3/connectionpool.py", line 789, in urlopen
response = self._make_request(
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/urllib3/connectionpool.py", line 536, in _make_request
response = conn.getresponse()
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/urllib3/connection.py", line 507, in getresponse
httplib_response = super().getresponse()
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/http/client.py", line 1375, in getresponse
response.begin()
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/http/client.py", line 318, in begin
version, status, reason = self._read_status()
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/http/client.py", line 287, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/elastic_transport/_node/_http_urllib3.py", line 167, in perform_request
response = self.pool.urlopen(
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/urllib3/connectionpool.py", line 843, in urlopen
retries = retries.increment(
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/urllib3/util/retry.py", line 449, in increment
raise reraise(type(error), error, _stacktrace)
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/urllib3/util/util.py", line 38, in reraise
raise value.with_traceback(tb)
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/urllib3/connectionpool.py", line 789, in urlopen
response = self._make_request(
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/urllib3/connectionpool.py", line 536, in _make_request
response = conn.getresponse()
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/urllib3/connection.py", line 507, in getresponse
httplib_response = super().getresponse()
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/http/client.py", line 1375, in getresponse
response.begin()
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/http/client.py", line 318, in begin
version, status, reason = self._read_status()
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/http/client.py", line 287, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/media/manhdt4/sda1/db-gpt/DB-GPT/examples/rag/bm25_retriever_example.py", line 50, in <module>
asyncio.run(main())
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
return future.result()
File "/media/manhdt4/sda1/db-gpt/DB-GPT/examples/rag/bm25_retriever_example.py", line 37, in main
assembler = BM25Assembler.load_from_knowledge(
File "/media/manhdt4/sda1/db-gpt/DB-GPT/dbgpt/rag/assembler/bm25.py", line 144, in load_from_knowledge
return cls(
File "/media/manhdt4/sda1/db-gpt/DB-GPT/dbgpt/rag/assembler/bm25.py", line 110, in __init__
if not self._es_client.indices.exists(index=self._index_name):
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/elasticsearch/_sync/client/utils.py", line 446, in wrapped
return api(*args, **kwargs)
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/elasticsearch/_sync/client/indices.py", line 1227, in exists
return self.perform_request( # type: ignore[return-value]
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/elasticsearch/_sync/client/_base.py", line 423, in perform_request
return self._client.perform_request(
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/elasticsearch/_sync/client/_base.py", line 271, in perform_request
response = self._perform_request(
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/elasticsearch/_sync/client/_base.py", line 316, in _perform_request
meta, resp_body = self.transport.perform_request(
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/elastic_transport/_transport.py", line 342, in perform_request
resp = node.perform_request(
File "/media/manhdt4/sda1/miniconda3/envs/dbgpt/lib/python3.10/site-packages/elastic_transport/_node/_http_urllib3.py", line 202, in perform_request
raise err from e
elastic_transport.ConnectionError: Connection error caused by: ConnectionError(Connection error caused by: ProtocolError(('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))))
Here is code in simple example related to error:
# create bm25 assembler
assembler = BM25Assembler.load_from_knowledge(
knowledge=knowledge,
es_config=es_config,
chunk_parameters=chunk_parameters,
)
Config:
def _create_es_config():
"""Create vector connector."""
return ElasticsearchVectorConfig(
name="bm25_es_dbgpt",
uri="localhost",
port="9200",
user="elastic",
password="changeme",
)
What I've Tried
from elasticsearch import Elasticsearch
es = Elasticsearch(['http://localhost:9200'], basic_auth=('elastic', 'changeme'))
What's causing this connection error in the DB-GPT context? I only want run a simple example.
I'm sorry, I can't assign this issue to db-gpt tag because it doesn't exist.
Because I build ELK with Docker and have enabled SSL/TLS
environment:
- node.name=es01
- cluster.name=${CLUSTER_NAME}
- discovery.type=single-node
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
- bootstrap.memory_lock=true
- xpack.security.enabled=true
- xpack.security.http.ssl.enabled=true
- xpack.security.http.ssl.key=certs/es01/es01.key
- xpack.security.http.ssl.certificate=certs/es01/es01.crt
- xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.enabled=true
- xpack.security.transport.ssl.key=certs/es01/es01.key
- xpack.security.transport.ssl.certificate=certs/es01/es01.crt
- xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
- xpack.security.transport.ssl.verification_mode=certificate
- xpack.license.self_generated.type=${LICENSE}
While BM25Assembler of DB_GPT does not support connecting to ELK with ssl=true. Here are some of my changes to successfully connect to ELK
def _create_es_config():
"""Create vector connector."""
return ElasticsearchVectorConfig(
name="bm25_es_dbgpt",
url="127.0.0.1",
port="9200",
user="elastic",
password="changeme",
ca_certs="/path/to/cert/ca.crt",
)
__init__
method of class BM25Assembler(BaseAssembler). The code was extracted from library, the parts with comments are my modifications...
...
self._es_config = es_config
self._es_url = es_config.uri
self._es_port = es_config.port
self._es_username = es_config.user
self._es_password = es_config.password
self._index_name = es_config.name
self._k1 = k1
self._b = b
self._ca_certs = es_config.ca_certs # my changes
if self._es_username and self._es_password and self._ca_certs: # my changes
self._es_client = Elasticsearch( # my changes
hosts=f"https://{self._es_url}:{self._es_port}", # my changes
basic_auth=(self._es_username, self._es_password), # my changes
verify_certs=True, # my changes
ca_certs=self._ca_certs # my changes
)
elif self._es_username and self._es_password and not self._ca_certs: # my changes
self._es_client = Elasticsearch(
hosts=[f"http://{self._es_url}:{self._es_port}"],
basic_auth=(self._es_username, self._es_password),
)
else:
self._es_client = Elasticsearch(
hosts=[f"http://{self._es_url}:{self._es_port}"],
)
...
...