Search code examples
cgsl

Save simulation data to binary in C


(EDITED) I have a particle simulator in C where I generate files with the particle states per integration step containing time, position, and velocity components. The code works well, but the resulting text files can easily go to gigabytes of data, with billions of lines. My initial idea is trying to save those files in binary instead in order to save space. I'm also using GSL for the ODEs. If so, I'm trying to save to binary file with fwrite. The following is a minimal working example:

#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

#include <gsl/gsl_rng.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_odeiv2.h>

int uniformFieldODE(double t, const double s[], double f[], void *params);

int main() {

    int noPrimaries = 1;

    double q = 1.0;
    double m = 1.0;

    double B[3] = { 0.0, 0.0, 1.0 };            
    double E[3] = { 0.1, 0.0, 0.1 };

    double x0[3];  // = {0, 0, 0};                 // Initial position
    double v0[3];  // = {1, 1, 1};                 // Initial velocity

    double t0 = 0.0;
    double tf = 5000.0;
    double dt = 0.05;

    double h = 1.0e-06;
    double epsAbs = 1.0e-08;
    double epsRel = 1.0e-10;

    char *file = "lorentz";

    const gsl_rng_type *T = gsl_rng_ranlxs0;
    gsl_rng *r = gsl_rng_alloc(T);
    gsl_rng_set(r, (unsigned long)time(NULL));

    for (int i = 0; i < noPrimaries; i++) {

        x0[0] = gsl_rng_uniform(r);
        x0[1] = gsl_rng_uniform(r);
        x0[2] = gsl_rng_uniform(r);
        v0[0] = gsl_rng_uniform(r);
        v0[1] = gsl_rng_uniform(r);
        v0[2] = gsl_rng_uniform(r);

        char fileNumber[i+1];
        char fileName[i+20];
        char extension[8] = ".bin";
        sprintf(fileNumber, "%d", i+1);
        strcpy(fileName, file);
        strcat(fileName, fileNumber);
        strcat(fileName, extension);  
    
        // Initial conditions & parameters
        const int dimension = 6;          // number of differential equations
        double s[dimension];              // Initial state vector
        int status;                       // status of driver function
        double paramsB[8];
        paramsB[0] = q;
        paramsB[1] = m;
        paramsB[2] = B[0];
        paramsB[3] = B[1];
        paramsB[4] = B[2];
        paramsB[5] = E[0];
        paramsB[6] = E[1];
        paramsB[7] = E[2];

        s[0] = x0[0]; s[1] = x0[1]; s[2] = x0[2];
        s[3] = v0[0]; s[4] = v0[1]; s[5] = v0[2];

        // File creation for state storing
        FILE *data = fopen(fileName, "wb");
    
        // Integrator configuration
        double t, t_next;
        gsl_odeiv2_system odeSystem;
        odeSystem.function = uniformFieldODE;
        odeSystem.dimension = dimension;
        odeSystem.params = paramsB;

        t = t0;
        gsl_odeiv2_driver *drv;
        drv = gsl_odeiv2_driver_alloc_y_new(&odeSystem, gsl_odeiv2_step_rk8pd, h, epsAbs, epsRel);

        for (t_next = t0 + dt; t_next <= tf; t_next += dt) {
        
            status = gsl_odeiv2_driver_apply(drv, &t, t_next, s);
            if (status != GSL_SUCCESS) {
                printf("Error: status = %d\n", status);
                break;
            }
            //fprintf(data, "%.5e %.5e %.5e %.5e %.5e %.5e %.5e\n", t, s[0], s[1], s[2], s[3], s[4], s[5]);
            char str[100];
            sprintf(str, "%.5e %.5e %.5e %.5e %.5e %.5e %.5e\n", t, s[0], s[1], s[2], s[3], s[4], s[5]);
            fwrite(&str, sizeof(str), 1, data);
        }
        gsl_odeiv2_driver_free(drv);
        fclose(data);
    }

    return 0;
}

int uniformFieldODE(double t, const double s[], double f[], void *params){

    (void)(t); /* avoid unused parameter warning */
    double *lparams = (double *)params;

    double q  = lparams[0];
    double m  = lparams[1];
    double mu = q / m;

    double Bx = lparams[2];
    double By = lparams[3];
    double Bz = lparams[4];

    double Ex = lparams[5];
    double Ey = lparams[6];
    double Ez = lparams[7];

    f[0] = s[3];
    f[1] = s[4];
    f[2] = s[5];
    f[3] = mu * (Bz*s[4] - By*s[5] + Ex);
    f[4] = mu * (Bx*s[5] - Bz*s[3] + Ey);
    f[5] = mu * (By*s[3] - Bx*s[4] + Ez);
    return GSL_SUCCESS;
}

The problem is that the output still comes out as text, even if it has .bin as extension. Some unreadable characters show up in the beginning of the strings, but otherwise the output is the same as of a text file:

1.00000e-04 7.14907e+08 7.14919e+08 2.14475e+08 -1.30229e+08 -9.46995e+06 -9.50139e+06
5せ2.00000e-04 7.14894e+08 7.14918e+08 2.14474e+08 -1.30245e+08 -9.31686e+06 -9.43359e+06

I'm sure that there's a simple way of doing this, but I cannot figure it out. I'll really appreciate any inputs.

From my original post I got feedbacks that binary might not be the way to go to preserve all the data. So my goal here is to find an output format with a small size when compared to pure text files.

The files will later be used for data analysis via e.g. Python with numpy and/or pandas.


Solution

  • Brad Lanam's answer mentions using libz to compress the output. Unfortunately it does not show an example.

    If your program would write text output to stdout, not to a file, like

    printf(str, "%.5e %.5e %.5e %.5e %.5e %.5e %.5e\n", t, s[0], s[1], s[2], s[3], s[4], s[5]);
    

    you could use a pipe like

    yourprogram | gzip > output.txt.gz
    

    In Python you can probably use classes gzip or libz to process a compressed input stream or you can use gzip in a pipe again like

    gzip -dc output.txt.gz | python_program