Search code examples
applescriptapplescript-objc

Is there a way to decode a Quoted-Printable encoding using ApplescriptObjc?


I am looking for a way to decode HTML Source code from a Mail message that is encoded in Quoted-Printable encoding, and wondering if that is something that can be done in AppleScriptObjC?

I know there is a built in module in Python 3 called quopri that will do the job. So I have a fall back solution. Just hoping I can do this without the need to install Python 3 on the users machine. Since the script is built in AppleScript already, I thought that AppleScriptObjC would be the best place to go looking for an answer.


Solution

  • The quoted-printable encoding standard is defined as part of RFC 2045.

    To summarise briefly and in a very general sense: non-printable ASCII characters are encoded as a three-character sequence beginning with "=" followed immediately by a two-digit hexadecimal number, which evaluates to an ASCII character code; "=" is, itself, encoded in the same way; lines can be up to, but not greater than, 76 characters long, for which "=" acts as a continuation (or soft-break) character if it appears at the end of a line.

    I took a superset of these rules to ensure the sufficient conditions would be met, around which the following script was written, and is presented "as is" with out any special attempts at optimisation (for which there remains scope to have done):

    #!/usr/bin/env osascript
    --------------------------------------------------------------------------------
    use framework "Foundation"
    use scripting additions
    --------------------------------------------------------------------------------
    prop NSString: ref current application's NSString
    
    prop rgex: 1024
    prop utf8: 4
    --------------------------------------------------------------------------------
    on qp_decode(input)
            local input
    
            try # test for existence as a file path
                    input as «class fsrf»
                    NSString's stringWithContentsOfURL:result ¬
                            encoding:utf8 |error|:(missing value)
            on error
                    input
            end try
            
            set qp to __s(result)
    
            replace(qp, "(?m)\\h*$", "")      # Removing trailing whitespace
            replace(qp, "(?sm)=(\\R|$)", "")  # Join lines split with a soft-break
            map(qp, "(?i)=[A-F0-9]{2}", char) # Decode escape sequences, i.e. "=HH"
    end qp_decode
    --------------------------------------------------------------------------------
    # "PRIVATE" HANDLERS:
    on __s(init)
            ref item 1 of {init}
    end __s
    
    to _nsstring_ from _ref given mutability:m as boolean : false
            local _ref, m, t
     
            set t to NSString's stringWithString:_ref
            if m then set t to t's mutableCopy()
            try
                    set _ref's contents to t
            end try
            t
    end _nsstring_
    
    to replace(_ref, a, b)
            local _ref, a, f, t
     
            set t to _nsstring_ from _ref with mutability
    
            t's replaceOccurrencesOfString:a withString:b ¬
                    options:rgex |range|:{0, t's |length|()}
            try
                    set _ref's contents to t as text
            end try
            t as text
    end replace
    
    to map(_ref, a, func as handler)
            local _ref, a, t, f
    
            script
                    prop fn: func
                    prop t : _nsstring_ from _ref
                    prop s : t's mutableCopy()
                    prop index: 0
    
                    on next()
                            t's rangeOfString:a options:rgex
                            set [j, n] to the result's [location, |length|]
                            if n = 0 then return the yield()
                            set u to (t's substringWithRange:[j, n]) as text
                            set c to fn(text 2 thru -1 of u)
                            tell s to replaceOccurrencesOfString:u ¬
                                    withString:c options:0 |range|:[my index, ¬
                                    |length|() - my index]
                            set index to index + j
                            set t to the substringFromIndex_(index) of s
                            
                            next()
                    end next
    
                    on yield()
                            try
                                    set _ref's contents to s as text
                            end try
                            s as text
                    end yield
            end script
            
            result's next()
    end map
    
    to base_10 from base_16
            # There's probably a more ASObjC-y way to do this
            local base_16
    
            set p to 1
            set |ξ| to 0
    
            script hexadecimal
                    prop digits: "123456789ABCDEF"
                    prop coefficients: reverse of base_16's characters
            end script
    
            repeat with x in hexadecimal's coefficients
                    set |ξ| to |ξ| + (offset of x in (hexadecimal's digits)) * p
                    set p to p * 16
            end repeat
    
            |ξ|
    end denary
    
    to char(k)
            if k's class = text then set k to (base_10 from k)
            character id k
    end char
    ---------------------------------------------------------------------------❮END❯
    

    The main handler you'll be calling is qp_decode(), which takes a single argument that can be a literal string or a file path at which the contents of the file is read and processed as a UTF-8 encoded string, then processed as would be a literal string argument.

    I note from the comment thread that you use Script Debugger. If the script fails to compile or execute, I'd recommended running the script in Script Editor (however, you'll need to copy/paste again from the original source here, as Script Debugger manipulates the contents of the clipboard prior to pasting). I actually used vim and osascript in authoring and debugging this script, so it's virgin text.