I want to use regex to replace combinations of matches.
This is what I have:
>>> re.compile("0").sub("2", "01101")
'21121'
This is what I want:
>>> replace_combinations(pattern="0", repl="2", string="01101")
['01101', '01121', '21101', '21121']
I can use re.finditer()
to get all the matches separately, then itertools.combinations()
to get combinations of them, but I don't know how to do the replacement part.
To answer my own question after some experimenting: for simple regexes the following (somewhat convoluted) method works. It may not for more complex examples (counterexamples, better answers welcome).
import re
from itertools import combinations
def replace_match(match, repl, string):
pre = string[:match.start()]
post = string[match.end():]
replaced = re.sub(match.re, repl, string[match.start():match.end()])
return pre + replaced + post
def replace_matches(matches, repl, string):
# Reverse the matches so we match from the right side of the string first.
# This means we don't adjust the indexing of later matches after replacing
# earlier matches with non-constant lengths.
for match in reversed(matches):
string = replace_match(match, repl, string)
return string
def replace_combinations(pattern, repl, string) -> Set[str]:
from itertools import combinations
results = set()
matches = list(re.finditer(pattern, string))
match_combinations = []
for r in range(len(matches)+1):
match_combinations.extend(combinations(matches, r))
for match_combination in match_combinations:
results.add(replace_matches(match_combination, repl, string))
return results
replace_combinations(pattern="0", repl="2", string="01101")
# {'01121', '21121', '01101', '21101'}