Brun's constant : https://en.wikipedia.org/wiki/Brun%27s_theorem http://numbers.computation.free.fr/Constants/Primes/twin.html
How to compute Brun's constant up to 10^20 with python knowing that primality check has a heavy cost and summing result up to 10^20 is a long task
here is my 2 cents attempt :
IsPrime : fastest way to check if the number is prime I know digit_root : compute the digital root of the number
If someone know what could be improve bellow to reach computation to 10^20, you're welcome
import numpy as np
import math
import time
#Brun's constant
#p B_2(p)
#10^2 1.330990365719...
#10^4 1.616893557432...
#10^6 1.710776930804...
#10^8 1.758815621067...
#10^10 1.787478502719...
#10^12 1.806592419175...
#10^14 1.820244968130...
#10^15 1.825706013240...
#10^16 1.830484424658...
#B_2 should reach 1.9 at p ~ 10^530 which is far beyond any computational project
#B_2*(p)=B_2(p)+ 4C_2/log(p)
#p B2*(p)
#10^2 1.904399633290...
#10^4 1.903598191217...
#10^6 1.901913353327...
#10^8 1.902167937960...
#10^10 1.902160356233...
#10^12 1.902160630437...
#10^14 1.902160577783...
#10^15 1.902160582249...
#10^16 1.902160583104...
def digit_root(number):
return (number - 1) % 9 + 1
first25Primes=[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]
def IsPrime(n) :
# Corner cases
if (n <= 1) :
return False
if (n <= 3) :
return True
# This is checked so that we can skip
# middle five numbers in below loop
if (n % 2 == 0 or n % 3 == 0) :
return False
#exclude digital root 3 or 6 or 9
if digit_root(n) in (3,6,9):
return False
if (n != 2 and n != 7 and n != 5 and str(n)[len(str(n))-1] not in ("1","3","7","9")): #si le nombre ne se termine pas par 1 3 7 9
return False
for i in first25Primes:
if n%i == 0 and i < n:
return False
if (n>2):
if (not(((n-1) / 4) % 1 == 0 or ((n+1) / 4) % 1 == 0)):
return False
if (n>3):
if (not(((n-1) / 6) % 1 == 0 or ((n+1) / 6) % 1 == 0)):
return False
i = 5
while(i * i <= n) :
if (n % i == 0 or n % (i + 2) == 0) :
return False
i = i + 6
return True
def ComputeB_2Aster(B_2val,p):
return B_2 + (C_2mult4/np.log(p))
start = time.time()
#approx De Brun's
B_2 = np.float64(0)
B_2Aster = np.float64(0)
one = np.float64(1)
#Twin prime constant
C_2 = np.float64(0.6601618158468695739278121100145557784326233602847334133194484233354056423)
C_2mult4 = C_2 * np.float64(4)
lastPrime = 2
lastNonPrime = 1
for p in range(3, 100000000000000000000,2):
if IsPrime(p):
lastNonPrime = p-1
if lastPrime == p-2 and lastNonPrime == p-1:
B_2 = B_2 + (one/np.float64(p)) + (one/np.float64(lastPrime))
lastPrime = p
else:
lastNonPrime = p
if p<10000000000:
if p%1000001==0:
print(f'p:{p} \t\t[elapsed:{time.time()-start}]\nB_2:{B_2:.52f} B_2Aster:{ComputeB_2Aster(B_2,p-2):.52f}\n',end="")
else:
print(f'p:{p} \t\t[elapsed:{time.time()-start}]\nB_2:{B_2:.52f} B_2Aster:{ComputeB_2Aster(B_2,p-2):.52f}\n',end="")
print(f'p:{p} \t\t[elapsed:{time.time()-start}]\nB_2:{B_2:.52f} B_2Aster:{ComputeB_2Aster(B_2,p-2):.52f}\n',end="")
Now, if you remember your classes in number theory there is something called sieve of Eratosthenes which is an algorithm to find all prime numbers up to any given limit:
algorithm Sieve of Eratosthenes is
input: an integer n > 1.
output: all prime numbers from 2 through n.
let A be an array of Boolean values, indexed by integers 2 to n,
initially all set to true.
for i = 2, 3, 4, ..., not exceeding √n do
if A[i] is true
for j = i2, i2+i, i2+2i, i2+3i, ..., not exceeding n do
set A[j] := false
return all i such that A[i] is true.
Now, this algorithm, translated into python is simply
def sieve_of_eratosthenes(max_num):
is_prime = np.full(max_num + 1, True, dtype=bool)
is_prime[0:2] = False
p = 2
while (p * p <= max_num):
if (is_prime[p] == True):
for i in range(p * p, max_num + 1, p):
is_prime[i] = False
p += 1
return np.nonzero(is_prime)[0]
Now, to compute Brun's constant, you will need that list of primes and the sum. So,
def brun_constant(primes):
sum_primes = mpfr('0')
last_p = mpz('2')
count = 0
for p in primes:
p_gmp = mpz(p)
if count > 0:
if is_prime(p_gmp + 2):
sum_primes += (1/mpfr(p_gmp) + 1/mpfr(p_gmp + 2))
last_p = p_gmp
count += 1
return float(sum_primes)
So, your full code will be
import numpy as np
import gmpy2
from gmpy2 import mpz, mpfr, is_prime
import time
def sieve_of_eratosthenes(max_num):
is_prime = np.full(max_num + 1, True, dtype=bool)
is_prime[0:2] = False
p = 2
while (p * p <= max_num):
if (is_prime[p] == True):
for i in range(p * p, max_num + 1, p):
is_prime[i] = False
p += 1
return np.nonzero(is_prime)[0]
def brun_constant(primes):
sum_primes = mpfr('0')
last_p = mpz('2')
count = 0
for p in primes:
p_gmp = mpz(p)
if count > 0:
if is_prime(p_gmp + 2):
sum_primes += (1/mpfr(p_gmp) + 1/mpfr(p_gmp + 2))
last_p = p_gmp
count += 1
return float(sum_primes)
start_time = time.time()
limit = 10**6
primes = sieve_of_eratosthenes(limit)
print(primes)
brun_const = brun_constant(primes)
end_time = time.time()
execution_time = end_time - start_time
print(f"Brun's constant up to {limit}: {brun_const}")
print(execution_time)
Note here I chose 10^6
as an example:
[ 2 3 5 ... 999961 999979 999983]
Brun's constant up to 1000000: 1.710776930804221
0.4096040725708008
So, it took 0.4 seconds.
For 10^8
I got
[ 2 3 5 ... 99999959 99999971 99999989]
Brun's constant up to 100000000: 1.758815621067975
47.44146728515625 seconds
I did not try higher because I was working in google.colab
and I exceeded the allowed limit.
Update
As nicely observed by nocomment
below, the process can be made even faster by eliminating the inner loop in the function sieve_of_erastothenes
so it becomes:
def sieve_of_eratosthenes(max_num):
is_prime = np.full(max_num + 1, True, dtype=bool)
is_prime[0:2] = False
p = 2
while (p * p <= max_num):
if (is_prime[p] == True):
is_prime[p*p :: p] = False
p += 1
return np.nonzero(is_prime)[0]
With this change:
[ 2 3 5 ... 99999959 99999971 99999989]
Brun's constant up to 100000000: 1.7588156210679355
12.536579847335815 seconds
for 10^8
Update 2: Execution time estimation
As I mentioned above, I am not working on a machine allowing me to test for 10^20. So, I can give you a method to estimate the time, using a regression method. First start of by computing the execution time for a 6,7,8,9,10. Then fit the curve with a polynomial:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
import time
import numpy as np
limits = [10**exp for exp in range(2, 10)]
times = []
for limit in limits:
start_time = time.time()
primes = sieve_of_eratosthenes(limit)
brun_const = brun_constant(primes)
end_time = time.time()
execution_time = end_time - start_time
times.append(execution_time)
print(f"Limit: {limit}, Time: {execution_time}")
X = np.log(limits).reshape(-1, 1)
y = np.log(times).reshape(-1, 1)
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)
model = LinearRegression()
model.fit(X_poly, y)
X_fit = np.linspace(min(X), max(X), 400).reshape(-1, 1)
X_fit_poly = poly.transform(X_fit)
y_fit = model.predict(X_fit_poly)
X_fit = np.exp(X_fit)
y_fit = np.exp(y_fit)
plt.figure(figsize=(10, 6))
plt.scatter(limits, times, color='blue', label='Actual Times')
plt.plot(X_fit, y_fit, color='red', label='Polynomial Model Fit')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Limits')
plt.ylabel('Execution Time (seconds)')
plt.title('Polynomial Regression Fit')
plt.legend()
plt.show()
log_limit_20 = np.log(np.array([10**20], dtype=np.float64)).reshape(-1, 1)
log_limit_20_poly = poly.transform(log_limit_20)
estimated_log_time_20 = model.predict(log_limit_20_poly)
estimated_time_20 = np.exp(estimated_log_time_20)
print(f"Estimated execution time for limit 10^20: {estimated_time_20[0][0]} seconds")
Which gives
Estimated execution time for limit 10^20: 147470457222.1028 seconds
or 40963960.339500725
hours, or 1706493
days
So, I would suggest using a machine with GPU.