what is the best approach to compute Brun's constant with python (witch include primality test and long run sum)?

Brun's constant : https://en.wikipedia.org/wiki/Brun%27s_theorem http://numbers.computation.free.fr/Constants/Primes/twin.html

How to compute Brun's constant up to 10^20 with python knowing that primality check has a heavy cost and summing result up to 10^20 is a long task

here is my 2 cents attempt :

IsPrime : fastest way to check if the number is prime I know digit_root : compute the digital root of the number

If someone know what could be improve bellow to reach computation to 10^20, you're welcome

import numpy as np
import math
import time

#Brun's constant
#p      B_2(p) 
#10^2   1.330990365719...
#10^4   1.616893557432...
#10^6   1.710776930804...
#10^8   1.758815621067...
#10^10  1.787478502719...
#10^12  1.806592419175...
#10^14  1.820244968130...
#10^15  1.825706013240...
#10^16  1.830484424658...
#B_2 should reach 1.9 at p ~ 10^530 which is far beyond any computational project

#B_2*(p)=B_2(p)+ 4C_2/log(p)
#p  B2*(p) 
#10^2   1.904399633290...
#10^4   1.903598191217...
#10^6   1.901913353327...
#10^8   1.902167937960...
#10^10  1.902160356233...
#10^12  1.902160630437...
#10^14  1.902160577783...
#10^15  1.902160582249...
#10^16  1.902160583104...

def digit_root(number):
    return (number - 1) % 9 + 1

first25Primes=[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]
def IsPrime(n) : 
    # Corner cases 
    if (n <= 1) : 
        return False
    if (n <= 3) : 
        return True
  
    # This is checked so that we can skip  
    # middle five numbers in below loop 
    if (n % 2 == 0 or n % 3 == 0) : 
        return False
    #exclude digital root 3 or 6 or 9
    if digit_root(n) in (3,6,9):
        return False
    
    if (n != 2 and n != 7 and n != 5 and str(n)[len(str(n))-1] not in ("1","3","7","9")): #si le nombre ne se termine pas par 1 3 7 9
        return False

    for i in first25Primes:
        if n%i == 0 and i < n:
            return False
            
    if (n>2):
        if (not(((n-1) / 4) % 1 == 0 or ((n+1) / 4) % 1 == 0)):
            return False
    if (n>3):
        if (not(((n-1) / 6) % 1 == 0 or ((n+1) / 6) % 1 == 0)):
            return False
    
    i = 5
    while(i * i <= n) :
        if (n % i == 0 or n % (i + 2) == 0) : 
            return False
        i = i + 6      
  
    return True

def ComputeB_2Aster(B_2val,p):
    return B_2 + (C_2mult4/np.log(p))


start = time.time()
#approx De Brun's
B_2 = np.float64(0)
B_2Aster = np.float64(0)
one = np.float64(1)
#Twin prime constant
C_2 = np.float64(0.6601618158468695739278121100145557784326233602847334133194484233354056423)
C_2mult4 = C_2 * np.float64(4)
lastPrime = 2
lastNonPrime = 1
for p in range(3, 100000000000000000000,2):    
    if IsPrime(p):
        lastNonPrime = p-1
        if lastPrime == p-2 and lastNonPrime == p-1:
            B_2 = B_2 + (one/np.float64(p)) + (one/np.float64(lastPrime))            
        lastPrime = p
    else:
        lastNonPrime = p
    
    if p<10000000000:
        if p%1000001==0:            
            print(f'p:{p} \t\t[elapsed:{time.time()-start}]\nB_2:{B_2:.52f} B_2Aster:{ComputeB_2Aster(B_2,p-2):.52f}\n',end="")
    else:        
        print(f'p:{p} \t\t[elapsed:{time.time()-start}]\nB_2:{B_2:.52f} B_2Aster:{ComputeB_2Aster(B_2,p-2):.52f}\n',end="")
print(f'p:{p} \t\t[elapsed:{time.time()-start}]\nB_2:{B_2:.52f} B_2Aster:{ComputeB_2Aster(B_2,p-2):.52f}\n',end="")

Solution

Now, if you remember your classes in number theory there is something called sieve of Eratosthenes which is an algorithm to find all prime numbers up to any given limit:

algorithm Sieve of Eratosthenes is
    input: an integer n > 1.
    output: all prime numbers from 2 through n.

    let A be an array of Boolean values, indexed by integers 2 to n,
    initially all set to true.
    
    for i = 2, 3, 4, ..., not exceeding √n do
        if A[i] is true
            for j = i2, i2+i, i2+2i, i2+3i, ..., not exceeding n do
                set A[j] := false

    return all i such that A[i] is true.

Now, this algorithm, translated into python is simply

def sieve_of_eratosthenes(max_num):
    is_prime = np.full(max_num + 1, True, dtype=bool)
    is_prime[0:2] = False
    p = 2
    while (p * p <= max_num):
        if (is_prime[p] == True):
            for i in range(p * p, max_num + 1, p):
                is_prime[i] = False
        p += 1
    return np.nonzero(is_prime)[0]

Now, to compute Brun's constant, you will need that list of primes and the sum. So,

def brun_constant(primes):
    sum_primes = mpfr('0')
    last_p = mpz('2')
    count = 0
    for p in primes:
        p_gmp = mpz(p)  
        if count > 0:
            if is_prime(p_gmp + 2):
                sum_primes += (1/mpfr(p_gmp) + 1/mpfr(p_gmp + 2))
        last_p = p_gmp
        count += 1
    return float(sum_primes)

So, your full code will be

import numpy as np
import gmpy2
from gmpy2 import mpz, mpfr, is_prime
import time

def sieve_of_eratosthenes(max_num):
    is_prime = np.full(max_num + 1, True, dtype=bool)
    is_prime[0:2] = False
    p = 2
    while (p * p <= max_num):
        if (is_prime[p] == True):
            for i in range(p * p, max_num + 1, p):
                is_prime[i] = False
        p += 1
    return np.nonzero(is_prime)[0]

def brun_constant(primes):
    sum_primes = mpfr('0')
    last_p = mpz('2')
    count = 0
    for p in primes:
        p_gmp = mpz(p)  
        if count > 0:
            if is_prime(p_gmp + 2):
                sum_primes += (1/mpfr(p_gmp) + 1/mpfr(p_gmp + 2))
        last_p = p_gmp
        count += 1
    return float(sum_primes)

start_time = time.time()

limit = 10**6
primes = sieve_of_eratosthenes(limit)
print(primes)
brun_const = brun_constant(primes)

end_time = time.time()
execution_time = end_time - start_time

print(f"Brun's constant up to {limit}: {brun_const}")
print(execution_time)

Note here I chose 10^6 as an example:

[     2      3      5 ... 999961 999979 999983]
Brun's constant up to 1000000: 1.710776930804221
0.4096040725708008

So, it took 0.4 seconds.

For 10^8 I got

[       2        3        5 ... 99999959 99999971 99999989]
Brun's constant up to 100000000: 1.758815621067975
47.44146728515625 seconds

I did not try higher because I was working in google.colab and I exceeded the allowed limit.

Update

As nicely observed by nocomment below, the process can be made even faster by eliminating the inner loop in the function sieve_of_erastothenes so it becomes:

def sieve_of_eratosthenes(max_num):
    is_prime = np.full(max_num + 1, True, dtype=bool)
    is_prime[0:2] = False
    p = 2
    while (p * p <= max_num):
        if (is_prime[p] == True):
            is_prime[p*p :: p] = False
        p += 1
    return np.nonzero(is_prime)[0]

With this change:

[       2        3        5 ... 99999959 99999971 99999989]
Brun's constant up to 100000000: 1.7588156210679355
12.536579847335815 seconds

for 10^8

Update 2: Execution time estimation

As I mentioned above, I am not working on a machine allowing me to test for 10^20. So, I can give you a method to estimate the time, using a regression method. First start of by computing the execution time for a 6,7,8,9,10. Then fit the curve with a polynomial:

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
import time
import numpy as np

limits = [10**exp for exp in range(2, 10)]  
times = []

for limit in limits:
    start_time = time.time()
    primes = sieve_of_eratosthenes(limit)
    brun_const = brun_constant(primes)
    end_time = time.time()
    execution_time = end_time - start_time
    times.append(execution_time)
    print(f"Limit: {limit}, Time: {execution_time}")

X = np.log(limits).reshape(-1, 1)  
y = np.log(times).reshape(-1, 1)   

poly = PolynomialFeatures(degree=2) 
X_poly = poly.fit_transform(X)

model = LinearRegression()
model.fit(X_poly, y)

X_fit = np.linspace(min(X), max(X), 400).reshape(-1, 1)
X_fit_poly = poly.transform(X_fit)
y_fit = model.predict(X_fit_poly)

X_fit = np.exp(X_fit)
y_fit = np.exp(y_fit)

plt.figure(figsize=(10, 6))
plt.scatter(limits, times, color='blue', label='Actual Times')
plt.plot(X_fit, y_fit, color='red', label='Polynomial Model Fit')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Limits')
plt.ylabel('Execution Time (seconds)')
plt.title('Polynomial Regression Fit')
plt.legend()
plt.show()

log_limit_20 = np.log(np.array([10**20], dtype=np.float64)).reshape(-1, 1)
log_limit_20_poly = poly.transform(log_limit_20)
estimated_log_time_20 = model.predict(log_limit_20_poly)
estimated_time_20 = np.exp(estimated_log_time_20)

print(f"Estimated execution time for limit 10^20: {estimated_time_20[0][0]} seconds")

Which gives

Estimated execution time for limit 10^20: 147470457222.1028 seconds

or 40963960.339500725 hours, or 1706493 days

So, I would suggest using a machine with GPU.