I want to parse a string like this:
Object A -> Object B [AB_name] Object B -> Object C [BC_name] ...
My goal is to obtain three lists:
Parents = ['Object A', 'Object B', ...]
Children = ['Object B', 'Object C', ...]
PC_names = ['AB_name','BC_name', ...]
I already have a working solution but it is incomprehensible thus difficult to maintain and not very robust. Basically my code iterates with two loops over the string and thereby adds the substrings the several lists.
To fix this I read about the parsec.py lib, but so far I couldn't find a good example for complete newbies like me. I already tried to figure out how it works with the help of other articles and the documentation but with little success by now.
I'm glad about every hint.
The test_input.txt:
Society {
A man -> a child [was once]
A man -> an automobile [has]
A woman -> a person [is]
A man -> a person [is]
A man -> a child [was once]
}
My current code:
from typing import List
from parsec import *
class Type(object):
label: str
class Aspect(object):
domain: Type
codomain: Type
label: str
def __init__(self) -> None:
self.domain = Type()
self.codomain = Type()
class Olog(object):
name: str
aspects: List[Aspect]
def __init__(self):
self.aspects = []
with open ('testinput.txt', 'r') as f:
f_content = f.read()
olog_name = f_content.split("{")[0]
first = Olog()
first.aspects = []
first.name = olog_name
olog_data = f_content.split("{")[1]
olog_data_lines = olog_data.split(']')
orientation = str
counter1 = 0
counter2 = 0
domain_str = ''
codomain_str = ''
type_comma = Type()
type_comma.label = ","
string_store = str
string_store = ''
type_store = Type()
type_store_split = [Type]
for lines in olog_data_lines:
first_type = ''
second_type = ''
aspect_label = str
first_T = Type()
second_T = Type()
lines += ']'
lines_split = lines.split()
type_in_list = False
for word in lines_split:
if word == "}" and counter1 == 0:
print("Olog is empty")
if word == "}":
print(">>>Olog was saved")
break
if word == "->":
counter1 +=1
if counter1 == counter2 and lines_split.index(word) == 0:
first_type = word
if counter1 == counter2 and not lines_split.index(word) == 0:
first_type = first_type + (" " + word)
if word == "->":
orientation = "->"
string_store = string_store + first_type + ", "
type_store.label = string_store
type_store_split = type_store.label.split(",")
for types in type_store_split:
if types == first_type:
domain_str = int(type_store_split.index(types))
type_in_list = True
break
if not type_in_list:
domain_str = int(len(type_store_split)-2)
if not counter1 == counter2:
if word[0] == "[":
aspect_label = (lines.split('[', 1)[1].split(']')[0])
else: second_type = second_type.replace('->','', 1) + " " + word
if (word[len(word)-1]=="]"):
second_T.label = second_type
string_store = string_store + second_type + ", "
type_store.label = string_store
type_store_split = type_store.label.split(",")
for types in type_store_split:
if types == second_type:
codomain_str = int(type_store_split.index(types))
second_T.label = codomain_str
break
elif types == type_store_split[len(type_store_split)-1]:
codomain_str = int(len(type_store_split)-2)
second_T.label = codomain_str
aspect_A = Aspect()
aspect_A.label = aspect_label
aspect_A.domain = Type()
aspect_A.codomain = Type()
aspect_A.domain.label = domain_str
aspect_A.codomain.label = codomain_str
first.aspects.append(aspect_A)
counter2 += 1
``
This solution uses re
and recursion parse the input lines and traverse the result, yield
ing back parents, children, and pc_names
:
import re, collections
def parse_line(l):
return [re.findall('\[.*?\]|[\w\s]+', i.strip()) for i in re.split('\s*\-\>\s*', l)]
lines = [parse_line(i) for i in open('test_input.txt') if not re.findall('[\{\}]', i)]
def get_vals(d, s = []):
if len(d) > 1:
yield ('pc_names', d[-1][1:-1])
if not (c:=[b for a, b in lines if d[0] == a[0]]):
yield ('children', d[0])
if (k:=[a for a, _ in lines if a[0] not in s]):
yield from get_vals(k[0], s+[d[0]])
else:
yield ('parents', d[0])
for i in c:
yield from get_vals(i, s+[d[0]])
result = collections.defaultdict(set)
for a, b in get_vals(lines[0][0]):
result[a].add(b)
print({a:list(b) for a, b in result.items()})
Output:
{'parents': ['A woman', 'A man'],
'pc_name': ['was once', 'is', 'has'],
'children': ['a person ', 'an automobile ', 'a child ']}
Second test_input.txt
content:
Object A -> Object B [AB_name]
Object B -> Object C [BC_name]
Result:
{'parents': ['Object B', 'Object A'],
'pc_names': ['AB_name', 'BC_name'],
'children': ['Object B ', 'Object C ']}