Search code examples
pythonjsonlistip

Find a list of unique IP address from a JSON log file in Python


How to write a program in python to find a list of the unique IP addresses from a JSON file?

I am only a newbie at Python and I have the following data in JSON format. I want to find the unique values of "remoteIp" key.

{
    "jsonPayload": {
      "enforcedSecurityPolicy": {
        "configuredAction": "DENY",
        "preconfiguredExprIds": [
          "owasp-crs-v030001-id942220-sqli"
        ],
        "priority": 2000,
        "outcome": "DENY"
      }
    },
    "httpRequest": {
      "requestMethod": "POST",
      "requestUrl": "https://wwwwwww.google.com///n",
      "requestSize": "3004",
      "status": 403,
      "responseSize": "274",
      "userAgent": "okhttp/3.12.2",
      "remoteIp": "182.2.169.59",
      "serverIp": "10.114.44.4"
    }
}

The solution I have created till now is able to fetch all the remoteIp's but is not unique.

import json
#unique_ip = {}
with open("automation.json") as file:
 data = json.load(file)
 for d1 in data:
  del d1['resource'], d1['timestamp'], d1['severity'], d1['logName'], d1['trace'], d1['spanId'], \
      d1['receiveTimestamp'], d1['jsonPayload']['statusDetails'], d1['jsonPayload']['@type'], d1['insertId'], \
      d1['jsonPayload']['enforcedSecurityPolicy']['name'], d1['httpRequest']['latency']

  # using d1['insertId'] above for uniquely identifying a record
  #print(d1['httpRequest']['remoteIp']) #d1['jsonPayload']['enforcedSecurityPolicy'])

with open('automation_new.json', 'w') as file:
   json.dump(data, file, indent=2)
for d2 in data:
    s1 = (d2['httpRequest']['requestUrl'])
    s2 = (d2['httpRequest']['requestMethod'])
    s3 = (d2['httpRequest']['remoteIp'])
    s4 = (str(d2['httpRequest']['status']))
    s5 = (d2['httpRequest']['userAgent'])
       #mylist = list((s1.split(), s2.split(), s3.split(), s5.split(), s4.split()))
    #mylist = list((s1, s2, s3, s4, s5))
    #def unique(s3):
        #x = np.array(s3)
        #print(np.unique(x))
    print(s3)
file.close()

Solution

  • use a set()

    a  = ['a','b','a']
    b = set(a)
    b
    # print {'a', 'b'}
    

    please print the type of s3

    for d2 in data:
        #...
        s3 = (d2['httpRequest']['remoteIp'])
        #...
        print("length of unique ip set is " + str(len(unique_ip)))
        unique_ip.add(''.join(s3))
    
    print(unique_ip)