Search code examples
pythonpython-3.xelasticsearchsslhaystack

haystack ElasticsearchDocumentStore unable to connect Elasticsearch


I am new to ElasticSearch as well as haystack and followed the Tutorial: Utilizing Existing FAQs for Question Answering to init the DocumentStore with code:

import os
import time

from haystack.document_stores import ElasticsearchDocumentStore

# Wait 30 seconds only to be sure Elasticsearch is ready before continuing
time.sleep(30)

# Get the host where Elasticsearch is running, default to localhost
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")

document_store = ElasticsearchDocumentStore(
    host=host,
    username="",
    password="",
    index="document",
    embedding_field="question_emb",
    embedding_dim=384,
    excluded_meta_data=["question_emb"],
    similarity="cosine",
)

Also tried simple version like:

from haystack.document_stores import ElasticsearchDocumentStore
doc_store = ElasticsearchDocumentStore(
    scheme="https",
    host='localhost',
    username='', password='',
    index='test' 
)

But both failed with error:

WARNING:elasticsearch:GET https://localhost:9200/ [status:N/A request:0.037s]
Traceback (most recent call last):
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 670, in urlopen
    httplib_response = self._make_request(
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 381, in _make_request
    self._validate_conn(conn)
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 978, in _validate_conn
    conn.connect()
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\connection.py", line 362, in connect
    self.sock = ssl_wrap_socket(
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\util\ssl_.py", line 386, in ssl_wrap_socket
    return context.wrap_socket(sock, server_hostname=server_hostname)
  File "C:\Users\dell\anaconda3\lib\ssl.py", line 500, in wrap_socket
    return self.sslsocket_class._create(
  File "C:\Users\dell\anaconda3\lib\ssl.py", line 1040, in _create
    self.do_handshake()
  File "C:\Users\dell\anaconda3\lib\ssl.py", line 1309, in do_handshake
    self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1123)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\dell\anaconda3\lib\site-packages\elasticsearch\connection\http_urllib3.py", line 255, in perform_request
    response = self.pool.urlopen(
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 726, in urlopen
    retries = retries.increment(
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\util\retry.py", line 386, in increment
    raise six.reraise(type(error), error, _stacktrace)
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\packages\six.py", line 734, in reraise
    raise value.with_traceback(tb)
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 670, in urlopen
    httplib_response = self._make_request(
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 381, in _make_request
    self._validate_conn(conn)
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 978, in _validate_conn
    conn.connect()
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\connection.py", line 362, in connect
    self.sock = ssl_wrap_socket(
  File "C:\Users\dell\anaconda3\lib\site-packages\urllib3\util\ssl_.py", line 386, in ssl_wrap_socket
    return context.wrap_socket(sock, server_hostname=server_hostname)
  File "C:\Users\dell\anaconda3\lib\ssl.py", line 500, in wrap_socket
    return self.sslsocket_class._create(
  File "C:\Users\dell\anaconda3\lib\ssl.py", line 1040, in _create
    self.do_handshake()
  File "C:\Users\dell\anaconda3\lib\ssl.py", line 1309, in do_handshake
    self._sslobj.do_handshake()
urllib3.exceptions.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1123)
---------------------------------------------------------------------------
ConnectionError                           Traceback (most recent call last)
~\anaconda3\lib\site-packages\haystack\document_stores\elasticsearch.py in _init_elastic_client(cls, host, port, username, password, api_key_id, api_key, aws4auth, scheme, ca_certs, verify_certs, timeout, use_system_proxy)
    270                 if not status:
--> 271                     raise ConnectionError(
    272                         f"Initial connection to Elasticsearch failed. Make sure you run an Elasticsearch instance "

ConnectionError: Initial connection to Elasticsearch failed. Make sure you run an Elasticsearch instance at `[{'host': 'localhost', 'port': 9200}]` and that it has finished the initial ramp up (can take > 30s).

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
<ipython-input-30-40f9994332f1> in <module>
----> 1 doc_store = ElasticsearchDocumentStore(
      2     scheme="https",
      3     host='localhost',
      4     username='elastic', password='do4-uzbyc4ZN2yUKBhtJ',
      5     index='CS1302'

~\anaconda3\lib\site-packages\haystack\nodes\base.py in wrapper_exportable_to_yaml(self, *args, **kwargs)
     46 
     47         # Call the actuall __init__ function with all the arguments
---> 48         init_func(self, *args, **kwargs)
     49 
     50     return wrapper_exportable_to_yaml

~\anaconda3\lib\site-packages\haystack\document_stores\elasticsearch.py in __init__(self, host, port, username, password, api_key_id, api_key, aws4auth, index, label_index, search_fields, content_field, name_field, embedding_field, embedding_dim, custom_mapping, excluded_meta_data, analyzer, scheme, ca_certs, verify_certs, recreate_index, create_index, refresh_type, similarity, timeout, return_embedding, duplicate_documents, index_type, scroll, skip_missing_embeddings, synonyms, synonym_type, use_system_proxy)
    141 
    142         # Base constructor might need the client to be ready, create it first
--> 143         client = self._init_elastic_client(
    144             host=host,
    145             port=port,

~\anaconda3\lib\site-packages\haystack\document_stores\elasticsearch.py in _init_elastic_client(cls, host, port, username, password, api_key_id, api_key, aws4auth, scheme, ca_certs, verify_certs, timeout, use_system_proxy)
    274                     )
    275         except Exception:
--> 276             raise ConnectionError(
    277                 f"Initial connection to Elasticsearch failed. Make sure you run an Elasticsearch instance at `{hosts}` and that it has finished the initial ramp up (can take > 30s)."
    278             )

ConnectionError: Initial connection to Elasticsearch failed. Make sure you run an Elasticsearch instance at `[{'host': 'localhost', 'port': 9200}]` and that it has finished the initial ramp up (can take > 30s).

I use windows 10, the elasticsearch version is 8.6(the latest) and I doesn't use docker. Any help is appreciated! (ps. I have seen the post Haystack's ElasticsearchDocumentStore() cannot connect running ElasticSearch container, but still don't have a clue about how to do.)


Solution

  • So far (January 2023), Haystack does not support elasticsearch version 8 yet. I'd suggest that you use version 7 instead. You can keep an eye on the following GitHub issue where the support for version 8 is tracked: https://github.com/deepset-ai/haystack/issues/2810