Search code examples
pythonduplicatesreadfile

avoid same data when reading from a file


strStr = ["192.168.42.12", "192.168.42.2"]
with open(datausage) as f:
    lines = f.readlines()
    for line in lines:
        for ii in strStr:
            if ii in line:
                result = line
                ip = line[5:-50]
                result_ip = ip.replace(" ", "")
                usage = line[-8:]
                d = usage.replace('KB', '')
                usage = d.replace('B', '')
                usage = usage.replace('\n', '')
                print result_ip + '\t\t\t' + str(usage)

result for above code: IP usage

192.168.42.12             151
192.168.42.12            4.95
192.168.42.12            3.25
192.168.42.2             3.73
192.168.42.2             3.73
192.168.42.12            5.36
192.168.42.12              705
192.168.42.12              282
192.168.42.12              225
192.168.42.2                81
192.168.42.2                40

Desired/expected output :

Need to just display only two IP address and its sum of usage like this

192.168.42.12      1025(sample)
192.168.42.2       540(sample)

Any help! Thanks in advance!


Solution

  • Use a dictionary to store the cumulative sum for the corresponding ips:

    You can store the ips as:

    result_count = {}
    
    
    with open(ipfile) as f:
        lines = f.readlines()
        for line in lines:
            ip = line.replace('\n', '').replace(' ', '')
            result_count[ip] = 0.0
    
    
    
    with open(datausage) as f:
        lines = f.readlines()
        for line in lines:
            for ii in result_count:
                if ii in line:
                    result = line
                    ip = line[5:-50]
                    result_ip = ip.replace(" ", "")
                    usage = line[-8:]
                    d = usage.replace('KB', '')
                    usage = d.replace('B', '')
                    usage = usage.replace('\n', '')
                    usage = usage.replace(' ', '')
                    usage = float(usage)
                    # add the sum to to the related ip
                    result_count[result_ip] += usage
                    print result_ip + '\t' + str(usage)
    
    for key, value in result_count.items():
         print(key, value)