Search code examples
pythonelasticsearchkibana

Uploading JSON file to elasticsearch/kibana


This code is converting access.log log to JSON format. I'm trying to upload it to elasticsearch in such a way that index also create as well. How can i do that?

import json 
import re
import logging
import ecs_logging
import time
import sys, requests, os
import time
from datetime import datetime
from elasticsearch import Elasticsearch
from pprint import pprint


res = requests.get('http://localhost:9200')
print (res.content)

es = Elasticsearch([{'host': 'localhost', 'port': '9200'}])


i = 0
result = {}

with open('access.log') as f:
    lines = f.readlines()

regex = '(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>.*)\] \"(?P<httpstatus>(GET|POST) .+ HTTP\/1\.1)\" (?P<returnstatus>\d{3} \d+) (\".*\")(?P<browserinfo>.*)\"'

for line in lines:

    r = re.match(regex,line)
    
    if r != None:
        result[i] = {'IP address': r.group('ipaddress'), 'Time Stamp': r.group('dateandtime'), 
                     'HTTP status': r.group('httpstatus'), 'Return status': 
                     r.group('returnstatus'), 'Browser Info': r.group('browserinfo')}
        i += 1
    
print(result)

with open('data.json', 'w') as fp:
    json.dump(result, fp) ``` 

Solution

  • Modified your code hope this would help you, if you have enabled authentication then we should mention the same and also as you have asked an index it would create named "my-index"

    import json 
    import re
    import logging
    import ecs_logging
    import time
    import sys, requests, os
    import time
    from datetime import datetime
    from elasticsearch import Elasticsearch, helpers
    from pprint import pprint
    import json
    from datetime import datetime
    from time import sleep
    from requests.auth import HTTPBasicAuth
    
    client = Elasticsearch(["localhost:9200"], http_auth=('elastic', '<your-es-pwd>'))
    #no authentication
    #client = Elasticsearch(["localhost:9200"], http_auth=('elastic', '<your-es-pwd>'))
    
    i = 0
    result = {}
    
    with open('access.log') as f:
        lines = f.readlines()
    
    regex = '(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>.*)\] \"(?P<httpstatus>(GET|POST) .+ HTTP\/1\.1)\" (?P<returnstatus>\d{3} \d+) (\".*\")(?P<browserinfo>.*)\"'
    
    for line in lines:
    
        r = re.match(regex,line)
        
        if r != None:
            result[i] = {'IP address': r.group('ipaddress'), 'Time Stamp': r.group('dateandtime'), 
                         'HTTP status': r.group('httpstatus'), 'Return status': 
                         r.group('returnstatus'), 'Browser Info': r.group('browserinfo')}
            i += 1
    
    
    with open('data.json', 'w') as fp:
        json.dump(result, fp)
        
    directory = '.'
    
    
    def load_json(directory):
        data = []
        for filename in os.listdir(directory):
            if filename.endswith('data.json'):
                with open(filename, 'r') as open_file:
                    json_data = json.load(open_file)
                    for i,k in enumerate(json_data.keys()):
                        data.append({
                            "_index": "my-index",
                             "_type": "my-type",
                            "_id": i,
                            "_source": json_data[k]
                        })
        return data
    
    helpers.bulk(client, load_json('.'))
    
    res = requests.get('http://localhost:9200', auth = HTTPBasicAuth('elastic', '<your-password'))
    #no authentication enabled
    #res = requests.get('http://localhost:9200')
    print(res.content)