Search code examples
elasticsearchelasticsearch-py

Elasticsearch: scroll between specified time frame


I have some data in elasticsearch. as shown in the image

enter image description here

I used below link example to do the scrolling

https://gist.github.com/drorata/146ce50807d16fd4a6aa

page = es.search(
    index = INDEX_NAME,
    scroll = '1m',
    size = 1000,
    body={"query": {"match_all": {}}})
    sid = page['_scroll_id']
    scroll_size = page['hits']['total']

    # Start scrolling

    print( "Scrolling...")
    while (scroll_size > 0):


        print("Page: ",count)
        page = es.scroll(scroll_id = sid, scroll = '10m')
        # Update the scroll ID
        sid = page['_scroll_id']

        for hit in page['hits']['hits']:
            #some code processing here

Currently my requirement is that i want to scroll but want to specify the start timestamp and end timestamp Need help as to how to do this using scroll.


Solution

  • example code. time range should be in es query. Also You should process the first query result.

    es_query_dict = {"query": {"range": {"timestamp":{
        "gte":"2018-08-00T00:00:00Z", "lte":"2018-08-17T00:00:00Z"}}}}
    
    
    def get_es_logs():
        es_client = Elasticsearch([source_es_ip], port=9200, timeout=300)
    
        total_docs = 0
        page = es_client.search(scroll=scroll_time,
                                size=scroll_size,
                                body=json.dumps(es_query_dict))
        while True:
            sid = page['_scroll_id']
            details = page["hits"]["hits"]
            doc_count = len(details)
            if len(details) > 0:
                total_docs += doc_count
                print("scroll size: " + str(doc_count))
                print("start bulk index docs")
                # index_bulk(details)
                print("end success")
            else:
                break
            page = es_client.scroll(scroll_id=sid, scroll=scroll_time)
    
        print("total docs: " + str(total_docs))