Search code examples
pythonregexzapier

Extract words between the word "for" and the opening parenthesis "(" in an email subject line. Email subject line is the input


The client's name is after the word "for" and before the opening parenthesis "(" that starts the proposal number. I need to extract the client name to use to look up the deal in a future step. What would be the easiest way to set this up? Using Zapier Extract Pattern or to Use Zapier Code in Python?

I have tried this and it did not work. It seemed promising though.

input_data

client = Reminder: Leruths has sent you a proposal for Business Name (#642931)

import regex
rgx = regex.compile(r'(?si)(?|{0}(.*?){1}|{1}(.*?)
{0})'.format('for', '('))
s1 = 'client'
for s in [s1]:
m = rgx.findall
for x in m:
print x.strip()

I have also tried this and it did not work.

start = mystring.find( 'for' )
end = mystring.find( '(' )
if start != -1 and end != -1:
result = mystring[start+1:end]

I am looking for Business Name to be returned in my example.


Solution

  • Fastest way:

    start = client.find('for')
    end = client.find('(')
    result = client[start+4:end-1]
    print(result)
    

    With regex:

    result = re.search(r' for (.*) [(]', client)
    print(result.group(1))
    

    There is probably a cleaner way to do this, but here is another solution without regex

    client = "Reminder: Leruths has sent you a proposal for Business Name (#642931)"
    
    cs = client.split(" ")
    name = ""
    append = False
    for word in cs:
        if "for" == word:
            append = True
        elif word.startswith("("):
            append = False
        if append is True and word != "for":
            name += (word + " ")
    name = name.strip()
    print(name)
    

    Another method:

    client = "Reminder: Leruths has sent you a proposal for Business Name (#642931)"
    
    cs = client.split(" ")
    name = ""
    forindex = cs.index("for")
    
    for i in range(forindex+1, len(cs)):
        if cs[i].startswith("("):
            break
        name += cs[i] + " "
    name = name.strip()
    
    print(name)
    

    Running the code below gives:

    Regex method took 2.3912417888641357 seconds
    Search word by word method took 4.78193998336792 seconds
    Search with list index method took 3.1756017208099365 seconds
    String indexing method took 0.8496286869049072 seconds
    

    Code to check the fastest to get the name over a million tries:

    import re
    import time
    
    client = "Reminder: Leruths has sent you a proposal for Business Name (#642931)"
    
    def withRegex(client):
        result = re.search(r' for (.*) [(]', client)
        return(result.group(1))
    
    def searchWordbyWord(client):
        cs = client.split(" ")
        name = ""
        append = False
        for word in cs:
            if "for" == word:
                append = True
            elif word.startswith("("):
                append = False
            if append is True and word != "for":
                name += (word + " ")
        name = name.strip()
        return name
    
    def searchWithListIndex(client):
        cs = client.split(" ")
        name = ""
        forindex = cs.index("for")
    
        for i in range(forindex+1, len(cs)):
            if cs[i].startswith("("):
                break
            name += cs[i] + " "
        name = name.strip()
    
        return name
    
    def stringIndexing(client):
        start = client.find('for')
        end = client.find('(')
        result = client[start+4:end-1]
        return result
    
    wr = time.time()
    for x in range(1,1000000):
        withRegex(client)
    wr = time.time() - wr
    print("Regex method took " + str(wr) + " seconds")
    
    sw = time.time()
    for x in range(1,1000000):
        searchWordbyWord(client)
    sw = time.time() - sw
    print("Search word by word method took " + str(sw) + " seconds")
    
    wl = time.time()
    for x in range(1,1000000):
        searchWithListIndex(client)
    wl = time.time() - wl
    print("Search with list index method took " + str(wl) + " seconds")
    
    si = time.time()
    for x in range(1,1000000):
        stringIndexing(client)
    si = time.time() - si
    print("String indexing method took " + str(si) + " seconds")