Is there a way of programatically extracting terms (length of contract) from free text

I want to extract contract length from text to term in months. Free text fields range from:

"2 x 5 year terms",
"3 further  x 4 years",
"two(2) further terms of five(5) years each",
"Two (2) Years + Two (2) Years + Two (2) Years",
"1 years + 1 years + 1 years" ,
"2 x 3 years",
"1 year and 6 months",
"

I'd like the output to be:

120 months,
144 months,
120 months,
72 months, 
36 months
72 months
18 months

import re

def calculate_duration(term):
    term = term.lower()

    # Handle "x year terms" pattern
    match = re.match(r'(\d+) x (\d+) year terms?', term)
    if match:
        return int(match.group(1)) * int(match.group(2)) * 12
    
    # Handle "FURTHER TERMS OF x YEARS EACH" pattern
    match = re.match(r'further terms of (\d+) years each', term)
    if match:
        return int(match.group(1)) * 12


    # Handle "FURTHER TERMS OF x YEARS EACH" pattern
    match = re.match(r'further terms of (\d+) years each', term)
    if match:
        return int(match.group(1)) * 12
    
    # Handle "FURTHER TERMS OF x YEARS EACH" pattern
    match = re.match(r'further terms of ((?:\d+\s?\(\w+\)\s?)?(\d+)) years each', term)
    if match:
        return int(match.group(2)) * 12

    # Handle "x years + x years + x years" pattern
    match = re.match(r'(\d+) years(\s?\+\s?\d+ years)+', term)
    if match:
        return sum(int(match.group(1)) for group in match.groups()) * 12

    # Handle other patterns or simple year counts
    match = re.match(r'(\d+) years?', term)
    if match:
        return int(match.group(1)) * 12

    # Handle other cases or unknown patterns
    return None

# Example usage
terms = [
    "2 x 5 year terms",
    "3 further x 4 YEAR terms",
    "Two (2) Years + Two (2) Years + Two (2) Years",
    "1 years + 1 years + 1 years" ,
    "2 x 3 years"
]

for term in terms:
    duration = calculate_duration(term)
    print(f"{term}: {duration} months")

Solution

"... I want to extract contract length from text to term in months. ..."

Utilize the eval built-in function.

Traverse the the text, appending the according values; numbers and operators.
When a "year" value is encountering, adjust the previous value accordingly; multiply by 12.

From here, produce a mathematical expression, by concatenating the values.

Here is an example.

import re

def parse(s: str):
    e = []
    for i, x in enumerate(s.split()):
        if any([c.isdigit() for c in x]):
            e.append(int(re.sub(r'\D', '', x)))
        elif 'year' in x.lower(): e[-1] *= 12
        elif x in ['x', 'of']: e.append('*')
        elif x in ['+', 'and']: e.append('+')
    return e

text = ['2 x 5 year terms',
        '3 further x 4 years',
        'two(2) further terms of five(5) years each',
        'Two (2) Years + Two (2) Years + Two (2) Years',
        '1 years + 1 years + 1 years',
        '2 x 3 years',
        '1 year and 6 months']
for string in text:
    exp = ' '.join(map(str, parse(string)))
    print(exp, '=', eval(exp))

Output

2 * 60 = 120
3 * 48 = 144
2 * 60 = 120
24 + 24 + 24 = 72
12 + 12 + 12 = 36
2 * 36 = 72
12 + 6 = 18

Edit

A similar approach would be to remove any non-associative values, i.e., keeping only "years", "of", "and", digits, and the characters, 'x', and '+'.

import re

a = re.compile(r'(?i)\b(?!(?:years?)|of|and|\d|x|\+)\w+|[()]')
b = re.compile(r'(?<!\S)[ \t]+| +$')

def parse(s: str):
    global a, b
    s = a.sub('', s).lower()
    s = b.sub('', s).replace('s', '')
    e = []
    for i, x in enumerate(s.split()):
        if x.isdigit(): e.append(int(x))
        elif x == 'year': e[-1] *= 12
        elif x in ['x', 'of']: e.append('*')
        elif x in ['+', 'and']: e.append('+')
    return ' '.join(map(str, e))

text = ['2 x 5 year terms',
        '3 further x 4 years',
        'two(2) further terms of five(5) years each',
        'Two (2) Years + Two (2) Years + Two (2) Years',
        '1 years + 1 years + 1 years',
        '2 x 3 years',
        '1 year and 6 months']
for string in text:
    exp = parse(string)
    print(exp, '=', eval(exp))