import re
input_text = "((PERS) Tomás), ((PERS) Kyara Gomez) y ((PERS) Camila) fueron a ((VERB) caminar) y ((VERB) saltar) ((PL_ADVB) en la montaña)(2023_-_02_-_05(00:00 am))((PL_ADVB) ((NOUN)en el parque amplio y naranja) por el otoño)"
#Initialize empty sub-lists
list_of_persons, list_of_actions_or_verbs, list_of_where_happens, list_of_when_happens = [], [], [], []
#patterns with capture groups:
capture_pattern_persons = r"\(\(PERS\)" + r'((?:[\w,;.]\s*)+)' + r"\)"
capture_pattern_actions_or_verbs = r"\(\(VERB\)" + r'((?:[\w,;.]\s*)+)' + r"\)"
capture_pattern_about_where_happens = r"\(\(PL_ADVB\)" + r'((?:[\w,;.()]\s*)+)' + r"\)"
capture_pattern_about_when_happens = r"\((\d*_-_\d{2}_-_\d{2}\s*\(\d{2}:\d{2}\s*(?:am|pm)\))\)"
#stock order reference --> [ ["(PERS) )"], ["(VERB) )"], ["(YYYY_-_MM_-_DD hh:mm am or pm)"], ["(PL_ADVB) )"] ]
info_list = [ [], [], [], [] ] # Initialize the empty list of list
#adds the elements captured with the patterns in each of the lists contained in the main list
#I add the lists that should already be complete to the main list
info_list.append(list_of_persons)
info_list.append(list_of_actions_or_verbs)
info_list.append(list_of_where_happens)
info_list.append(list_of_when_happens)
print(repr(info_list)) #print the output main list with the elements in the lists
capture_pattern_persons
captures the elements of the first sub-list
capture_pattern_actions_or_verbs
captures the elements of the first sub-list
capture_pattern_about_where_happens
captures the elements of the first sub-list
capture_pattern_about_when_happens
captures the elements of the first sub-list
After adding all the elements, the list should look like this when you print it to control it
[ ["Tomás", "Kyara Gomez", "Camila"], ["caminar", "saltar"], ["2023_-_02_-_05(00:00 am)"], ["en la montaña", "((NOUN)en el parque amplio y naranja) por el otoño"] ]
You can use re.findall
, the function returns list, so you can also omit the initialization. For example:
import re
input_text = "((PERS) Tomás), ((PERS) Kyara Gomez) y ((PERS) Camila) fueron a ((VERB) caminar) y ((VERB) saltar) ((PL_ADVB) en la montaña)(2023_-_02_-_05(00:00 am))((PL_ADVB) ((NOUN)en el parque amplio y naranja) por el otoño)"
# store the result
info_list = []
# patterns with capture groups
capture_pattern_persons = r"\(\(PERS\)\s*" + r'((?:[\w,;.]\s*)+)' + r"\)"
# others ......
# get the list captured with the pattern
list_of_persons = re.findall(capture_pattern_persons, input_text)
# others ......
# add the lists to the main list
info_list.append(list_of_persons)
# others ......
# print the result list
print(info_list)