I am trying to calculate the Lexicographic Rank of a given permutation of characters with duplicates.
All the examples I have found seem to deal with finding the Lexicographic rank of a given input string against anagrams of input string (opposed to an arbitrary character set)
A,B,C
BAA
(note C
is not included in the input)3
(max length of character combinations...1 & 2 character combinations must also be included)This looks like basically every other ranking/unranking algorithm pair, so easier done than said? The short version is that, to get a word of length at most n, we either have the empty word, which comes first, or a letter followed by a word of length at most n−1. By subtracting one from the rank and dividing by the number of words that answer to the latter description, we can get the first letter and then recurse (but with iteration instead of actual recursion).
import itertools
def count_words(alphabet, max_length):
count = 1
for i in range(max_length):
count = count * len(alphabet) + 1
return count
def word_from_rank(alphabet, rank, max_length):
count = count_words(alphabet, max_length)
letters = []
while rank > 0:
count = (count - 1) // len(alphabet)
i, rank = divmod(rank - 1, count)
letters.append(alphabet[i])
return "".join(letters)
def rank_from_word(alphabet, word, max_length):
letter_to_rank = {letter: i for (i, letter) in enumerate(alphabet)}
count = count_words(alphabet, max_length)
rank = 0
for letter in word:
count = (count - 1) // len(alphabet)
rank += count * letter_to_rank[letter] + 1
return rank
def test(alphabet, max_length):
words = sorted(
{
"".join(letters)
for n in range(max_length + 1)
for letters in itertools.product(alphabet, repeat=n)
}
)
assert count_words(alphabet, max_length) == len(words)
for rank, word in enumerate(words):
assert word_from_rank(alphabet, rank, max_length) == word
assert rank_from_word(alphabet, word, max_length) == rank
test("ABC", 3)
test("ABCD", 3)
test("ABCDE", 4)
print(rank_from_word("ABC", "BAA", 3))
for i in range(count_words("ABC", 3)):
print(i, word_from_rank("ABC", i, 3))
Output:
16
0
1 A
2 AA
3 AAA
4 AAB
5 AAC
6 AB
7 ABA
8 ABB
9 ABC
10 AC
11 ACA
12 ACB
13 ACC
14 B
15 BA
16 BAA
17 BAB
18 BAC
19 BB
20 BBA
21 BBB
22 BBC
23 BC
24 BCA
25 BCB
26 BCC
27 C
28 CA
29 CAA
30 CAB
31 CAC
32 CB
33 CBA
34 CBB
35 CBC
36 CC
37 CCA
38 CCB
39 CCC