Search code examples
delphidelphi-xe2

Random number with equal total


I have 4 number

a,b,c,d : integers

i need to have a random number between 2-7 assigned to each one but the total of all four numbers has to be 22

how can i do this?


Solution

  • First of all I'm going to make it clear that as stated the question does not uniquely define the problem. You ask for random sampling, but do not specify the desired distribution of the samples.

    It's a common abuse of mathematical terminology to say random when you actually mean uniformly distributed. So I'm going to assume that's what you mean. Specifically that you want all possible distinct sets of 4 numbers to have equal probability of selection. The simplest and most efficient way to achieve this is as follows:

    • Enumerate all such possible sets of 4 numbers.
    • Count these sets of numbers, say N.
    • To sample, choose random number, i say, from uniform distribution in range 0 to N-1.
    • Return the i-th set of 4 numbers.

    The list of possible distinct sets is small. Off the top of my head I'd guess there are around 50 candidates.

    Generating the list of candidates is quite simple. Just run three nested for loops from 2 to 7. This gives you combinations of the first three numbers. Add them up, subtract from 22 and check if the final number is in range.


    Since you seem to like to see code, here's a simple demonstration:

    {$APPTYPE CONSOLE}
    
    uses
      System.Math,
      Generics.Collections;
    
    type
      TValue = record
        a, b, c, d: Integer;
        procedure Write;
      end;
    
    procedure TValue.Write;
    begin
      Writeln(a, ' ', b, ' ', c, ' ', d);
    end;
    
    var
      Combinations: TArray<TValue>;
    
    procedure InitialiseCombinations;
    var
      a, b, c, d: Integer;
      Value: TValue;
      List: TList<TValue>;
    begin
      List := TList<TValue>.Create;
      try
        for a := 2 to 7 do
          for b := 2 to 7 do
            for c := 2 to 7 do
            begin
              d := 22 - a - b - c;
              if InRange(d, 2, 7) then
              begin
                Value.a := a;
                Value.b := b;
                Value.c := c;
                Value.d := d;
                List.Add(Value);
              end;
            end;
        Combinations := List.ToArray;
      finally
        List.Free;
      end;
    end;
    
    function GetSample: TValue;
    begin
      Result := Combinations[Random(Length(Combinations))];
    end;
    
    var
      i: Integer;
    
    begin
      Randomize;
      InitialiseCombinations;
      for i := 1 to 25 do
        GetSample.Write;
      Readln;
    end.
    

    It's clear from inspection that this algorithm samples from the available values uniformly.

    But what about the other proposed algorithms. We can perform a crude heuristic test by sampling repeatedly and counting how many times each possible sample is produced. Here it is:

    {$APPTYPE CONSOLE}
    
    uses
      System.SysUtils,
      System.Math,
      Generics.Collections;
    
    type
      TValue = record
        a, b, c, d: Integer;
        procedure Write;
        class operator Equal(const lhs, rhs: TValue): Boolean;
      end;
    
    procedure TValue.Write;
    begin
      Writeln(a, ' ', b, ' ', c, ' ', d);
    end;
    
    class operator TValue.Equal(const lhs, rhs: TValue): Boolean;
    begin
      Result := (lhs.a=rhs.a) and (lhs.b=rhs.b) and (lhs.c=rhs.c) and (lhs.d=rhs.d);
    end;
    
    var
      Combinations: TArray<TValue>;
    
    procedure InitialiseCombinations;
    var
      a, b, c, d: Integer;
      Value: TValue;
      List: TList<TValue>;
    begin
      List := TList<TValue>.Create;
      try
        for a := 2 to 7 do
          for b := 2 to 7 do
            for c := 2 to 7 do
            begin
              d := 22 - a - b - c;
              if InRange(d, 2, 7) then
              begin
                Value.a := a;
                Value.b := b;
                Value.c := c;
                Value.d := d;
                List.Add(Value);
              end;
            end;
        Combinations := List.ToArray;
      finally
        List.Free;
      end;
    end;
    
    function GetSampleHeffernan: TValue;
    begin
      Result := Combinations[Random(Length(Combinations))];
    end;
    
    function GetSampleVanDien: TValue;
    const
      TOTAL = 22;
      VALUE_COUNT = 4;
      MIN_VALUE = 2;
      MAX_VALUE = 7;
    var
      Values: array[0..VALUE_COUNT-1] of Integer;
      Shortage: Integer;
      Candidates: TList<Integer>;
      ValueIndex: Integer;
      CandidateIndex: Integer;
    begin
      Assert(VALUE_COUNT * MAX_VALUE >= TOTAL, 'Total can never be reached!');
      Assert(VALUE_COUNT * MIN_VALUE <= TOTAL, 'Total is always exceeded!');
      Randomize;
      Candidates := TList<Integer>.Create;
      try
        for ValueIndex := 0 to VALUE_COUNT-1 do
        begin
          Values[ValueIndex] := MIN_VALUE;
          Candidates.Add(ValueIndex);
        end;
        Shortage := TOTAL - VALUE_COUNT * MIN_VALUE;
        while Shortage > 0 do
        begin
          CandidateIndex := Random(Candidates.Count);
          ValueIndex := Candidates[CandidateIndex];
          Values[ValueIndex] := Values[ValueIndex] + 1;
          if Values[ValueIndex] = MAX_VALUE then
            Candidates.Remove(CandidateIndex);
          Shortage := Shortage - 1;
        end;
      finally
        Candidates.Free;
      end;
    
      Result.a := Values[0];
      Result.b := Values[1];
      Result.c := Values[2];
      Result.d := Values[3];
    end;
    
    function GetSampleLama: TValue;
    type
      TRandomValues = array[1..4] of Integer;
    var
      IntSum: Integer;
      Values: TRandomValues;
    begin
      // initialize a helper variable for calculating sum of the generated numbers
      IntSum := 0;
      // in the first step just generate a number in the range of 2 to 7 and store
      // it to the first integer element
      Values[1] := RandomRange(2, 7);
      // and increment the sum value
      IntSum := IntSum + Values[1];
      // as the next step we need to generate number, but here we need also say in
      // which range by the following rules to ensure we ever reach 22 (consider, if
      // the 1st number was e.g. 3, then you can't generate the second number smaller
      // than 5 because then even if the next two numbers would be max, you would get
      // e.g. only 3 + 4 + 7 + 7 = 21, so just use this rule:
      // Values[1] Values[2]
      //        2      6..7
      //        3      5..7
      //        4      4..7
      //        5      3..7
      //     6..7      2..7
      Values[2] := RandomRange(Max(2, 8 - Values[1]), 7);
      // and increment the sum value
      IntSum := IntSum + Values[2];
      // if the third step we need to generate a value in the range of 15 to 20 since
      // the fourth number can be still in the range of 2 to 7 which means that the sum
      // after this step must be from 22-7 to 22-2 which is 15 to 20, so let's generate
      // a number which will fit into this sum
      Values[3] := RandomRange(Max(2, Min(7, 15 - IntSum)), Max(2, Min(7, 20 - IntSum)));
      // and for the last number let's just take 22 and subtract the sum of all previous
      // numbers
      Values[4] := 22 - (IntSum + Values[3]);
    
      Result.a := Values[1];
      Result.b := Values[2];
      Result.c := Values[3];
      Result.d := Values[4];
    end;
    
    function IndexOf(const Value: TValue): Integer;
    begin
      for Result := 0 to high(Combinations) do
        if Combinations[Result] = Value then
          exit;
      raise EAssertionFailed.Create('Invalid value');
    end;
    
    procedure CheckCounts(const Name: string; const GetSample: TFunc<TValue>);
    const
      N = 1000000;
    var
      i: Integer;
      Counts: TArray<Integer>;
      Range: Integer;
    begin
      SetLength(Counts, Length(Combinations));
      for i := 1 to N do
        inc(Counts[IndexOf(GetSample)]);
      Range := MaxIntValue(Counts) - MinIntValue(Counts);
      Writeln(Name);
      Writeln(StringOfChar('-', Length(Name)));
      Writeln(Format('Range = %d, N = %d', [Range, N]));
      Writeln;
    end;
    
    begin
      Randomize;
      InitialiseCombinations;
      CheckCounts('Heffernan', GetSampleHeffernan);
      //CheckCounts('Van Dien', GetSampleVanDien);
      CheckCounts('Lama', GetSampleLama);
      Readln;
    end.
    

    The output, from one particular run, is:

    Heffernan
    ---------
    Range = 620, N = 1000000
    
    Lama
    ----
    Range = 200192, N = 1000000
    

    The Van Dien variant is commented out at the moment since it produces invalid values.


    OK, I debugged and fixed the Van Dien variant. The test and results now look like this:

    {$APPTYPE CONSOLE}
    
    uses
      System.SysUtils,
      System.Math,
      Generics.Collections;
    
    type
      TValue = record
        a, b, c, d: Integer;
        procedure Write;
        class operator Equal(const lhs, rhs: TValue): Boolean;
      end;
    
    procedure TValue.Write;
    begin
      Writeln(a, ' ', b, ' ', c, ' ', d);
    end;
    
    class operator TValue.Equal(const lhs, rhs: TValue): Boolean;
    begin
      Result := (lhs.a=rhs.a) and (lhs.b=rhs.b) and (lhs.c=rhs.c) and (lhs.d=rhs.d);
    end;
    
    var
      Combinations: TArray<TValue>;
    
    procedure InitialiseCombinations;
    var
      a, b, c, d: Integer;
      Value: TValue;
      List: TList<TValue>;
    begin
      List := TList<TValue>.Create;
      try
        for a := 2 to 7 do
          for b := 2 to 7 do
            for c := 2 to 7 do
            begin
              d := 22 - a - b - c;
              if InRange(d, 2, 7) then
              begin
                Value.a := a;
                Value.b := b;
                Value.c := c;
                Value.d := d;
                List.Add(Value);
              end;
            end;
        Combinations := List.ToArray;
      finally
        List.Free;
      end;
    end;
    
    function GetSampleHeffernan: TValue;
    begin
      Result := Combinations[Random(Length(Combinations))];
    end;
    
    function GetSampleVanDien: TValue;
    const
      TOTAL = 22;
      VALUE_COUNT = 4;
      MIN_VALUE = 2;
      MAX_VALUE = 7;
    var
      Values: array[0..VALUE_COUNT-1] of Integer;
      Shortage: Integer;
      Candidates: TList<Integer>;
      ValueIndex: Integer;
      CandidateIndex: Integer;
    begin
      Assert(VALUE_COUNT * MAX_VALUE >= TOTAL, 'Total can never be reached!');
      Assert(VALUE_COUNT * MIN_VALUE <= TOTAL, 'Total is always exceeded!');
      Candidates := TList<Integer>.Create;
      try
        for ValueIndex := 0 to VALUE_COUNT-1 do
        begin
          Values[ValueIndex] := MIN_VALUE;
          Candidates.Add(ValueIndex);
        end;
        Shortage := TOTAL - VALUE_COUNT * MIN_VALUE;
        while Shortage > 0 do
        begin
          CandidateIndex := Random(Candidates.Count);
          ValueIndex := Candidates[CandidateIndex];
          inc(Values[ValueIndex]);
          if Values[ValueIndex] = MAX_VALUE then
            Candidates.Delete(CandidateIndex);
          dec(Shortage);
        end;
      finally
        Candidates.Free;
      end;
    
      Result.a := Values[0];
      Result.b := Values[1];
      Result.c := Values[2];
      Result.d := Values[3];
    end;
    
    function GetSampleLama: TValue;
    type
      TRandomValues = array[1..4] of Integer;
    var
      IntSum: Integer;
      Values: TRandomValues;
    begin
      // initialize a helper variable for calculating sum of the generated numbers
      IntSum := 0;
      // in the first step just generate a number in the range of 2 to 7 and store
      // it to the first integer element
      Values[1] := RandomRange(2, 7);
      // and increment the sum value
      IntSum := IntSum + Values[1];
      // as the next step we need to generate number, but here we need also say in
      // which range by the following rules to ensure we ever reach 22 (consider, if
      // the 1st number was e.g. 3, then you can't generate the second number smaller
      // than 5 because then even if the next two numbers would be max, you would get
      // e.g. only 3 + 4 + 7 + 7 = 21, so just use this rule:
      // Values[1] Values[2]
      //        2      6..7
      //        3      5..7
      //        4      4..7
      //        5      3..7
      //     6..7      2..7
      Values[2] := RandomRange(Max(2, 8 - Values[1]), 7);
      // and increment the sum value
      IntSum := IntSum + Values[2];
      // if the third step we need to generate a value in the range of 15 to 20 since
      // the fourth number can be still in the range of 2 to 7 which means that the sum
      // after this step must be from 22-7 to 22-2 which is 15 to 20, so let's generate
      // a number which will fit into this sum
      Values[3] := RandomRange(Max(2, Min(7, 15 - IntSum)), Max(2, Min(7, 20 - IntSum)));
      // and for the last number let's just take 22 and subtract the sum of all previous
      // numbers
      Values[4] := 22 - (IntSum + Values[3]);
    
      Result.a := Values[1];
      Result.b := Values[2];
      Result.c := Values[3];
      Result.d := Values[4];
    end;
    
    function IndexOf(const Value: TValue): Integer;
    begin
      for Result := 0 to high(Combinations) do
        if Combinations[Result] = Value then
          exit;
      raise EAssertionFailed.Create('Invalid value');
    end;
    
    procedure CheckCounts(const Name: string; const GetSample: TFunc<TValue>);
    const
      N = 1000000;
    var
      i: Integer;
      Counts: TArray<Integer>;
      Range: Integer;
    begin
      SetLength(Counts, Length(Combinations));
      for i := 1 to N do
        inc(Counts[IndexOf(GetSample)]);
      Range := MaxIntValue(Counts) - MinIntValue(Counts);
      Writeln(Name);
      Writeln(StringOfChar('-', Length(Name)));
      Writeln(Format('Range = %d, N = %d', [Range, N]));
      Writeln;
    end;
    
    begin
      Randomize;
      InitialiseCombinations;
      CheckCounts('Heffernan', GetSampleHeffernan);
      CheckCounts('Van Dien', GetSampleVanDien);
      CheckCounts('Lama', GetSampleLama);
      Readln;
    end.
    
    Heffernan
    ---------
    Range = 599, N = 1000000
    
    Van Dien
    --------
    Range = 19443, N = 1000000
    
    Lama
    ----
    Range = 199739, N = 1000000
    

    And just to ram it home, here are some plots of empirical probability mass function of the various distributions:

    enter image description here


    OK, now I fixed @TLama's code. It was using RandomRange incorrectly. The documentation states:

    RandomRange returns a random integer from the range that extends between AFrom and ATo (non-inclusive).

    The key is that the range is defined as a closed-open interval. The value returned is in the range [AFrom..ATo), or expressed with inequality signs, AFrom <= Value < ATo.

    But @TLama's code is written on the assumption that the interval is closed at both ends. So the code can readily be fixed by adding 1 to the second parameter of each call to RandomRange. When we do that the output looks like this:

    Heffernan
    ---------
    Range = 587, N = 1000000
    
    Van Dien
    --------
    Range = 19425, N = 1000000
    
    Lama
    ----
    Range = 79320, N = 1000000
    

    And the empircal PMF plot becomes:

    enter image description here


    The bottom line in all this is that sampling is difficult to get right if you care about the distribution.