Search code examples
pythonstatisticsjuliaprobability

Calculate number of trials to get uniques


Given the following table

Type     Chance Number of unique elements 
common   30.00%  21
Uncommon 30.00%  27
Rare     20.00%  32
Ultra    15.00%  14
Epic     5.00%   10

Is there a way to calculate with a python script the average trials necessary to get X amount of unique elements across all the types (I.e. 5 elements that I do not own, regardless if they are common or uncommon etc...)?


Solution

  • OK, this is the solution:

    from numpy.random import multinomial
    import numpy as np
    
    class UniqueElements:
        def __init__(self, type_dist, unique_elements):
            self.type_dist = type_dist
            self.unique_elements = unique_elements
            self.unique_elements_dist = [[1.0 / n for i in range(n)] for n in unique_elements]
            self.init_items()
            
        def pickone(self, dist):
            return np.where(multinomial(1, dist) == 1)[0][0]
        
        def init_items(self):
            self.items = np.zeros((len(self.type_dist), max(self.unique_elements)), dtype=int)
        
        def draw(self):
            item_type = self.pickone(self.type_dist)
            item_number = self.pickone(self.unique_elements_dist[item_type])
            return item_type, item_number
        
        def draw_unique(self, x):
            while (self.items > 0).sum() < x:
                item_type, item_number = self.draw()
                self.items[item_type, item_number] += 1
            return self.items.sum()
        
        def average_for_unique(self, x, n, reset=True):
            tot_draws = 0
            for i in range(n):
                tot_draws += self.draw_unique(x)
                if reset:
                    self.init_items()
                else:
                    self.items[self.items>1] -= 1
    
            return tot_draws / n
        
    if __name__ == '__main__':
        type_dist = [0.3, 0.3, 0.2, 0.15, 0.05]
        unique_elements = [21, 27, 32, 14, 10]
        ue = UniqueElements(type_dist, unique_elements)
        print(ue.average_for_unique(10, 100000))
            
    

    If you want to put each completed set aside and continue with whatever remains, change the last line as follows: print(ue.average_for_unique(10, 100000, reset=False))

    Note: For x = 5, the average is 5.1, for x = 8, the average is 8.3. This is not surprising as there are 104 unique elements across the different types.

    For the sake of demo, this is the same program using Julia programming language:

    using Random
    
    function pickone(dist)
        n = length(dist)
        i = 1
        r = rand()
        while r >= dist[i] && i<n 
            i+=1
        end
        return i
    end  
    
    function init_items(type_dist, unique_elements)
        return zeros(Int32, length(type_dist), maximum(unique_elements))
    end
    
    function draw(type_dist, unique_elements_dist)
        item_type = pickone(type_dist)
        item_number = pickone(unique_elements_dist[item_type])
        return item_type, item_number
    end
    
    function draw_unique(type_dist, unique_elements_dist, items, x)
        while sum(items .> 0) < x
            item_type, item_number = draw(type_dist, unique_elements_dist)
            items[item_type, item_number] += 1
        end
        return sum(items)
    end
    
    function average_for_unique(type_dist, unique_elements_dist, x, n, reset=true)
        println("Started computing...")
        items = init_items(type_dist, unique_elements)
    
        tot_draws = 0
        for i in 1:n
            tot_draws += draw_unique(type_dist, unique_elements_dist, items, x)
            if reset
                items .= 0
            else
                items[items.>1] -= 1
            end
        end
    
        return tot_draws / n
    end
        
    type_dist = [0.3, 0.3, 0.2, 0.15, 0.05]
    type_dist = cumsum(type_dist)
    
    unique_elements = [21, 27, 32, 14, 10]
    unique_elements_dist = [[1 / unique_elements[j] for i in 1:unique_elements[j]] for j in 1:length(unique_elements)]
    unique_elements_dist = [cumsum(dist) for dist in unique_elements_dist]
    
    avg = average_for_unique(type_dist, unique_elements_dist, 10, 100000)
    print(avg)
        
    

    Longer startup time, especially while downloading and compiling packages. After that, it's blazing fast.

    Anyone can improve the Python version to match the Julia version? 100 points bounty.