Given a dict of vocabulary: {'A': 3, 'B': 4, 'C': 5, 'AB':6}
and a sentence, which should be segmented: ABCAB
.
I need to create all possible combinations of this sentence such as
[['A', 'B', 'C', 'A', 'B'], ['A', 'B', 'C', 'AB'], ['AB', 'C', 'AB'], ['AB', 'C', 'A', 'B']]
That's what I have:
def find_words(sentence):
for i in range(len(sentence)):
for word_length in range(1, max_word_length + 1):
word = sentence[i:i+word_length]
print(word)
if word not in test_dict:
continue
if i + word_length <= len(sentence):
if word.startswith(sentence[0]) and word not in words and word not in ''.join(words):
words.append(word)
else:
continue
next_position = i + word_length
if next_position >= len(sentence):
continue
else:
find_ngrams(sentence[next_position:])
return words
But it returns me only one list.
I was also looking for something useful in itertools but I couldn't find anything obviously useful. Might've missed it, though.
Try all possible prefixes and recursively do the same for the rest of the sentence.
VOC = {'A', 'B', 'C', 'AB'} # could be a dict
def parse(snt):
if snt == '':
yield []
for w in VOC:
if snt.startswith(w):
for rest in parse(snt[len(w):]):
yield [w] + rest
print(list(parse('ABCAB')))
# [['AB', 'C', 'AB'], ['AB', 'C', 'A', 'B'],
# ['A', 'B', 'C', 'AB'], ['A', 'B', 'C', 'A', 'B']]