Search code examples
c++performance

Can I speed up my text file writing by using memory first?


Here is my code in C++ I am trying to speed it up if possible. How do I write to memory then dump the whole file to "Primes List.txt" at the end? Thanks if you can help at all.

#include <vector>
#include <iostream>
#include <fstream>
#include <chrono>
using namespace std;

int main()
{
    cout << "\n\n\n     Calculating all Prime Numbers up to 82,000,000";
    cout << "\n\n     You will have to give me exactly a minute! ...";
    cout << "\n\n     ";
    auto start = chrono::steady_clock::now();
    ofstream myfile;
    myfile.open("Primes List.txt");
    myfile << "2\n";
    vector<int> primes;
    primes.push_back(2);
    for (int i = 3; i < 82000000; i++)
    {
        bool prime = true;
        for (int j = 0; j < primes.size() && primes[j] * primes[j] <= i; j++)
        {
            if (i % primes[j] == 0)
            {
                prime = false;
                break;
            }
        }
        if (prime)
        {
            primes.push_back(i);
            myfile << i << "\n";
         }
     }
    auto end = chrono::steady_clock::now();
    chrono::duration<double> elapsed_seconds = end - start;
    myfile << "\n Elapsed Time: " << elapsed_seconds.count() << " seconds\n";
    cout << "Elapsed Time: " << elapsed_seconds.count() << " seconds\n\n\n";
    myfile.close();
    system("pause");
return 0;
}

I'm running this on quite a beast of a PC and would expect it to run faster.


Solution

  • As multiple commenters noted, the first issue is to speed up the prime generation. The following code 1) uses a bitmap for the sieve which greatly reduces the required memory and 2) only checks numbers that are +/-1 mod 6.

    This is fastest sieve algorithm of which I know. On my machine it only took 108ms to cover up to 82M. Sieving the odds was 180ms and I didn't have enough patience to measure the canonical sieve algorithm.

    Sample Code

    auto sieve_mod6_prime_seq(int max = int{1} << 20) {
        std::vector<int> primes;
        primes.push_back(2);
        primes.push_back(3);
    
        auto max_index = max / 3;
        auto bits_per = sizeof(uint64_t) * CHAR_BIT;
        auto nwords = (bits_per + max_index - 1) / bits_per;
        std::vector<uint64_t> words(nwords);
    
        words[0] |= 1;
        size_t wdx = 0;
        while (wdx < nwords) {
            auto b = std::countr_one(words[wdx]);
            auto p = 3 * (64 * wdx + b) + 1 + (b bitand 1);
            if (b < 64 and p < max) {
                primes.push_back(p);
    
                for (auto j = p; j < max; j += 6 * p) {
                    auto idx = j / 3;
                    auto jdx = idx / 64;
                    auto jmask = uint64_t{1} << (idx % 64);
                    words[jdx] |= jmask;
                }
    
                for (auto j = 5 * p; j < max; j += 6 * p) {
                    auto idx = j / 3;
                    auto jdx = idx / 64;
                    auto jmask = uint64_t{1} << (idx % 64);
                    words[jdx] |= jmask;
                }
            }
            else {
                ++wdx;
            }
        }
        return primes;
    }
    

    For C++ versions without std::countr_one available, here is an implementation.

    // If we are using gcc or clang, using the compiler builtin.
    #if defined(__GNUC__) || defined(__clang__)
    
    int countr_one(unsigned int n) {
        return ~n == 0 ? (sizeof(unsigned int) * CHAR_BIT) : __builtin_ctz(~n);
    }
    
    int countr_one(unsigned long int n) {
        return ~n == 0 ? (sizeof(unsigned long int) * CHAR_BIT) : __builtin_ctzl(~n);
    }
    
    int countr_one(unsigned long long int n) {
        return ~n == 0 ? (sizeof(unsigned long long int) * CHAR_BIT) : __builtin_ctzll(~n);
    }
    
    // Otherwise, a standards compliant implementation
    #else
    
    int countr_one(uint32_t n) {
        n = ~n & (n+1);   // this gives a 1 to the left of the trailing 1's
        n--;              // this gets us just the trailing 1's that need counting
        n = (n & 0x55555555) + ((n>>1) & 0x55555555);  // 2 bit sums of 1 bit numbers
        n = (n & 0x33333333) + ((n>>2) & 0x33333333);  // 4 bit sums of 2 bit numbers
        n = (n & 0x0f0f0f0f) + ((n>>4) & 0x0f0f0f0f);  // 8 bit sums of 4 bit numbers
        n = (n & 0x00ff00ff) + ((n>>8) & 0x00ff00ff);  // 16 bit sums of 8 bit numbers
        n = (n & 0x0000ffff) + ((n>>16) & 0x0000ffff); // sum of 16 bit numbers
        return n;
    }
    
    int countr_one(uint64_t n) {
        n = ~n & (n+1);
        n--;
        n = (n & 0x5555555555555555ul) + ((n>>1) & 0x5555555555555555ul);
        n = (n & 0x3333333333333333ul) + ((n>>2) & 0x3333333333333333ul);
        n = (n & 0x0f0f0f0f0f0f0f0ful) + ((n>>4) & 0x0f0f0f0f0f0f0f0ful);
        n = (n & 0x00ff00ff00ff00fful) + ((n>>8) & 0x00ff00ff00ff00fful);
        n = (n & 0x0000ffff0000fffful) + ((n>>16) & 0x0000ffff0000fffful);
        n = (n & 0x00000000fffffffful) + ((n>>32) & 0x00000000fffffffful);
        return n;
    }
    
    #endif