Search code examples
pythonnon-ascii-characters

excluding non-ascii characters in python


I have a script which uses a dictionary to decrypt an encrypted message, the problem is the decryption process produces a lot of junk (a.k.a non-ascii) characters. Here is my code:

from Crypto.Cipher import AES
import base64
import os

BLOCK_SIZE = 32

PADDING = '{'

# Encrypted text to decrypt
encrypted = "WI4wBGwWWNcxEovAe3p+GrpK1GRRQcwckVXypYlvdHs="

DecodeAES = lambda c, e: c.decrypt(base64.b64decode(e)).rstrip(PADDING)

adib = open('words.txt')
for line in adib.readlines():
    secret = line.rstrip('\n')
    if (secret[-1:] == "\n"):
        print "Error, new line character at the end of the string. This will not match!"
    elif (len(secret) >= 32):
        print "Error, string too long. Must be less than 32 characters."
    else:
        # create a cipher object using the secret
        cipher = AES.new(secret + (BLOCK_SIZE - len(secret) % BLOCK_SIZE) * PADDING)

        # decode the encoded string 
        decoded = DecodeAES(cipher, encrypted)
        print decoded+"\n"

what I have thought of so far is converting decoded string into Ascii then exclude non-ascii characters but it didn't work.


Solution

  • This version will work:

    #!/usr/bin/env python
    # -*- coding: UTF-8 -*-
    
    def evaluate_string_is_ascii(mystring):
        is_full_ascii=True
        for c in mystring:
            try:
                if ord(c)>0 and ord(c)<=127:
                    #print c,"strict ascii =KEEP"
                    pass
                elif ord(c)>127  and ord(c)<=255:
                    #print c,"extended ascii code =TRASH"
                    is_full_ascii=False
                    break
                else:
                   # print c,"no ascii  =TRASH"
                    is_full_ascii=False
                    break
            except:
                #print c,"no ascii  =TRASH"
                is_full_ascii=False
                break
        return is_full_ascii
    
    
    my_text_content="""azertwxcv
    123456789
    456dqsdq13
    o@��nS��?t#�
    lkjal�
    kfldjkjl&é"""
    
    for line in my_text_content.split('\n'):
    
        #check if line contain only ascii
        if evaluate_string_is_ascii(line)==True:
    
            #print the line
            print line