Search code examples
pythonunique

python Unique Results


I have 2 files with ip-Addresses and I am trying to see which addresses from file 1 are not in file 2. I just cant get it to work, what am I doing wrong? I got the following code:

access = []   
with open("C:\\users\\joey\\desktop\\access.log",'r') as bestand:
    for line in bestand:
        try:
            splittedline = line.split('sftp-session')[1].split("[")[1].split("]")[0]
        except Exception:
            continue
        access.append(splittedline)


nodes = []
with open("C:\\users\\joey\\desktop\\exit_nodes.csv",'r') as bestand1:
    for line in bestand1:
        nodes.append(line)


setA = set(access)
setB = set(nodes)
listC = list(setB - setA)

print listC

output: (just a small part)

59.231\n', '78.41.115.145\n', '62.210.76.96\n', '84.53.203.38\n', '185.82.216.119\n', '176.10.99.205\n', '107.150.53.178\n', '37.157.192.208\n', '91.238.60.100\n', '110.93.23.170\n', '162.247.72.213\n', '18.239.0.140\n', '84.115.35.248\n', '106.187.37.158\n', '213.61.149.125\n', '86.178.119.84\n', '50.76.159.218\n', '46.72.101.220\n', '78.46.51.124\n', '178.162.193.213\n', '207.201.223.196\n', '101.99.64.150\n', '5.199.142.93\n', '5.165.42.171\n', '185.17.144.138\n', '81.219.51.206\n', '65.181.113.136\n', '185.13.37.158\n', '104.232.3.33\n', '77.109.141.140\n', '77.170.1.2\n', '93.126.101.223\n', '188.246.75.178\n', '193.107.85.61\n', '188.138.1.229\n', '108.26.225.148\n', '108.61.212.102\n', '128.79.53.244\n', '81.89.0.195\n', '94.23.30.53\n', '104.237.156.214\n', '68.233.235.217\n', '188.166.49.82\n', '192.3.177.167\n', '173.208.196.215\n', '77.109.138.44\n', '106.187.45.156\n', '78.142.175.70\n', '71.230.253.68\n', '66.146.193.31\n', '90.231.152.159\n', '122.19.43.24\n', '79.98.107.90\n', '178.9.251.184\n', '176.108.160.253\n', '93.95.228.116\n', '106.185.29.93\n', '109.169.23.202\n', '94.242.57.26\n', '79.165.223.209\n', '192.241.199.208\n', '162.220.56.186\n', '212.71.238.203\n', '178.79.161.152\n', '78.21.6.161\n', '85.159.113.228\n', '37.139.3.171\n', '104.167.102.244\n', '62.49.92.150\n', '66.220.3.179\n', '185.61.148.183\n', '104.167.113.138\n', '66.85.131.72\n', '37.59.123.142\n', '121.54.175.50\n', '94.242.251.112\n', '185.13.38.185\n', '24.175.166.20\n', '54.65.198.84\n', '176.123.6.101\n', '176.10.99.202\n', '176.106.54.54\n

Solution

  • Try stripping out newlines for each line you read in before adding them to your list. I think the newlines in your second list is interfering with the comparison.

    >>> a = "one two three\n"
    >>> a
    'one two three\n'
    >>> a.rstrip("\n")
    'one two three'
    >>> a
    'one two three\n'