Search code examples
csegmentation-faultcoredumpresamplingbinning

Segmentation Fault in binning/jack-knife c program


Why do I get segmentation fault (core dumped)? I can't figure out where the problem is.

The program is supposed to read a data file with thermalised data, bin the data and then resample using a jack-knife method.

No matter how much data is used, I receive a segmentation fault.

/* Ben Marshall 12/03/15 
Bin data & Jack-Knife  */

#include <stdio.h>
#include <stdlib.h>


int main(void)
{
  int binSize, i=0, j=0, k=0, s=0, dataSize=0;
  double *data, *binData, *jackData, mean,  test; //variance

  /* open files, therm.dat is the data to be jack-knifed, 
     jack-knife.dat will store the new data  */
  FILE *thermalised = fopen("therm.dat","r");
  FILE *jack = fopen("jack-knife.dat","w");
  if(thermalised==NULL || jack==NULL) {
    return(-1);
  }

  // check amount of data to be used
  while(fscanf(thermalised, "%lf", &test) != EOF) {
    i++;
  }

  // close and reopen file to reset to start of file
  fclose(thermalised);
  FILE *therm = fopen("therm.dat","r");
  if(therm==NULL) {
    return(-1);
  }

  dataSize=i;
  data=malloc(dataSize*sizeof(double));
  binData=malloc(dataSize*sizeof(double));
  jackData=malloc(dataSize*sizeof(double));

  i=0;
  while(fscanf(therm, "%lf", &data[i]) != EOF) {
    i++;
  }

  fclose(therm);

  /* increasing bin size in multiples of 2  */
  for(binSize=1; (dataSize/binSize)>20; (binSize=binSize*2)) {

    /* binning the data */
    for(j=0; j<dataSize; j=(j+binSize)) {
      for(k=0; k<binSize; k++) {
    binData[s] = binData[s] + data[j+k];
      }
      binData[s] = binData[s]/binSize;
      s++;
    }

    /* jack-knifing the binned data */
    for(i=0; i<(dataSize/binSize); i++) {
      for(s=0; s<(dataSize/binSize); s++) {
    jackData[i] = jackData[i] + binData[s] - binData[i];
      }
      jackData[i] = jackData[i]/((dataSize/binSize)-1); 
    }

    /* calculate mean and variance(FINISH!!) of jackData */
    for(i=0; i<(dataSize/binSize); i++) {
      mean = mean + jackData[i];
    }
    mean = mean/(dataSize/binSize);
    fprintf(jack, "%d\t%lf\n",binSize,mean);
  }

  free(data);
  free(binData);
  free(jackData);
  fclose(jack);
  return(0);
} 

Therm.dat file with 512 data points

0.000000
0.000000
-0.680375
-0.680375
-0.680375
-0.350821
-0.887280
-0.995220
-1.252962
-1.252962
-1.252962
-1.687556
-1.901494
-1.387267
-0.661730
-1.270084
-1.071972
-0.331553
-0.331553
0.231933
0.206068
-0.472156
-0.064220
-0.339324
-0.326490
-0.326490
-0.326490
-0.866318
-0.866318
-0.571235
-0.571235
0.289255
-0.609399
0.218489
0.218489
-0.561976
0.309681
0.309681
1.183490
0.242221
0.242221
0.708890
0.708890
0.683819
0.348371
0.285159
0.285159
0.285159
-0.156746
0.274667
-0.005291
0.286612
0.286612
-0.473538
-0.134213
-0.134213
0.165067
0.165067
-0.149541
-0.866894
-0.866894
-0.866894
-0.866894
-0.298544
-0.298544
0.406136
0.123975
0.260068
0.260068
0.645153
1.192939
1.192939
1.359937
0.546329
1.339987
1.349098
0.379595
-0.490413
-0.256790
-0.756331
-0.344652
0.190825
0.021848
0.533022
0.068725
0.068725
0.068725
0.068725
0.068725
0.716304
0.120708
-0.344601
-0.344601
-0.864071
-0.133875
-0.174296
0.685892
0.763051
0.616413
0.105252
1.001374
0.001387
0.001387
0.001387
0.001387
0.856729
0.010591
-0.177193
0.496544
-0.329510
-0.968900
-1.073870
-0.979022
-1.353796
-1.415412
-1.024003
-1.024003
-1.024003
-1.281823
-1.281823
-1.281823
-1.281823
-1.281823
-1.281823
-1.276340
-0.604276
-0.604276
0.241735
-0.057679
0.446233
0.446233
0.812499
1.349643
1.083498
1.083498
1.062126
0.119195
0.559111
0.120574
0.120574
-0.064506
-0.064506
0.262792
0.393765
0.703879
0.037393
0.569609
0.603709
0.603709
0.603709
0.603709
0.211741
-0.136132
-0.411412
0.219343
0.001131
-0.253185
0.090066
-0.390811
-0.390811
-0.697071
-0.761953
-0.282947
-0.282947
0.529573
0.806371
0.619948
0.286835
0.286835
0.710076
0.710076
0.710076
1.047118
1.047118
0.209257
-0.320486
0.437228
0.437228
-0.448875
-1.172709
-1.172709
-0.767286
-0.767286
-0.767286
-0.919685
-0.467507
-0.467507
-0.394832
-0.394832
0.023062
0.023062
-0.432039
-0.638257
-0.638257
0.005328
0.561397
1.318880
1.597995
0.734204
-0.082765
-0.326956
-0.326956
0.003100
0.214446
-0.003320
0.478868
1.333778
1.039363
1.312167
1.649395
1.649395
1.482254
1.482254
0.958699
0.983845
0.217772
-0.033559
-0.068988
-0.184109
-0.843987
-0.332641
0.015332
-0.443388
-0.443388
-1.238509
-1.060085
-0.070902
-0.070902
0.341742
-0.389328
0.512347
-0.460587
-0.876405
-0.824193
-1.554555
-0.753674
-0.519466
-0.122992
-0.437232
0.299364
1.196347
1.280916
0.896763
1.011643
0.832369
0.675100
0.615301
0.425211
0.701377
-0.076070
-0.029239
0.631120
0.579626
0.341775
0.149383
0.149383
1.009203
0.161747
0.822448
0.822448
1.240791
0.484235
0.484235
0.165943
1.093288
0.536774
0.284845
-0.387362
0.170619
0.170619
0.170619
-0.393905
0.205390
0.403015
0.534997
0.763501
0.763501
0.763501
0.999167
0.524351
0.524351
0.419718
-0.419463
-0.419463
-0.815160
-1.106938
-1.172858
-1.172858
-1.209887
-0.934196
-0.934196
-1.289821
-0.355606
0.517273
0.517273
0.116671
-0.018992
0.978757
1.368280
1.057544
0.342433
0.980111
0.645430
-0.112589
-0.419199
-1.189227
-0.503506
-0.510428
-0.510428
0.107349
0.318306
-0.419542
-0.419542
0.114221
-0.738931
-0.738931
-0.738931
-0.738931
-0.927926
-0.927926
-0.577008
-0.577008
-0.577008
-0.577008
-0.102578
-0.311335
0.354756
-0.375415
-0.704244
-0.928206
-0.928206
-0.224889
-0.382932
-0.784753
-0.784753
-0.010049
0.010331
0.010331
0.241014
0.336769
0.510614
0.697081
0.261887
0.114445
-0.511449
-0.404934
-0.359657
-1.349987
-0.498508
-0.780040
-0.225245
0.335179
-0.511846
-0.437551
-0.314674
-0.314674
-0.314674
-0.848702
-0.922187
-0.877289
-0.877289
-0.877289
-0.244820
-0.160898
-0.160898
-0.970461
-0.970461
-0.634152
-0.438910
-0.560479
-0.804812
-0.760734
-0.480944
-0.480944
0.097673
-0.115412
0.665033
0.665033
0.479649
0.126541
-0.066140
-0.066140
-0.193375
0.768804
0.768804
0.565950
0.024579
0.024579
0.678449
0.703008
0.424989
-0.506374
-0.092152
-0.856560
-0.856560
-0.808172
-0.354146
-1.247427
-1.247427
-1.247427
-0.615559
-0.180127
0.485542
0.233292
0.880815
0.880815
0.880815
0.880815
0.583724
-0.246717
0.551569
0.551569
0.558707
0.972352
0.147571
0.015242
0.633824
0.770137
0.263372
0.263372
0.263372
0.263372
-0.136203
0.208565
-0.084859
0.232177
-0.301226
-0.301226
-1.109600
-1.109600
-1.109600
-1.291821
-0.494729
0.190720
0.702423
0.524186
0.524186
-0.366817
-0.366817
-0.304917
0.181145
0.007122
0.669448
0.716738
-0.135397
-0.188442
-0.647237
-0.175581
-0.442751
-0.442751
-0.137842
0.450729
0.648758
0.288951
-0.001317
0.112005
-0.294367
0.276585
-0.241832
0.241945
0.241945
-0.462878
-0.105833
-0.441753
-1.137753
-0.650209
0.203320
0.174555
-0.605070
-0.667136
-0.667136
-0.355055
-0.074939
0.837230
0.837230
0.837230
0.196938
0.182119
0.247177
0.865210
0.018448
0.931659
0.931659
0.709177
0.709177
-0.025761
0.642050
0.127487
-0.427523
-0.584748
-0.584748
0.222443
0.222443
1.019483
1.019483
1.019483
1.465055
0.589628
0.067904
0.067904
-0.622642
-1.007569
-0.876365
-1.184423
-1.385407
-0.547937
-0.303931
0.422156
0.422156
0.257257
0.257257
0.199488

Solution

  • There are a few problems with your code:

    while(fscanf(thermalised, "%lf", &test) != EOF)
    

    This will cause an endless loop if the file contains something that fails to parse as a floating point number. Use this instead:

    while(fscanf(thermalised, "%lf", &test) == 1)
    

    Why do you then close and reopen the file instead of just calling rewind() ?

    In the second reading loop, you should be more careful, in case another process is writing the "them.dat" file asynchronously. You should just try to parse as many numbers as you allocated:

    for (i = 0; i < dataSize; i++) {
        if (fscanf(therm, "%lf", &data[i]) != 1) {
            printf("error: them.dat changed\n");
            exit(1);
        }
    }
    

    You should not return -1 from main(), most systems only support exit status between 0 and 255.

    In the binning phase, you have a potential buffer overflow:

    for(binSize=1; (dataSize/binSize)>20; (binSize=binSize*2)) {
    
        /* binning the data */
        for(j=0; j<dataSize; j=(j+binSize)) {
            for(k=0; k<binSize; k++) {
                binData[s] = binData[s] + data[j+k];
            }
            binData[s] = binData[s]/binSize;
            s++;
        }
    

    If dataSize is not a power of 2, the last bin may extend beyond the end of the data array, invoking undefined behaviour, possibly the crash you are experiencing. Modify you boundary test to avoid this.

    Furthermore, both binData and jackData are used before initialization. You should allocate these arrays with calloc so they start at 0.0 (on systems with IEEE floats ;-), or intialize them to the appropriate values with a loop.

    When you compute the mean:

        /* calculate mean and variance(FINISH!!) of jackData */
        for(i=0; i<(dataSize/binSize); i++) {
            mean = mean + jackData[i];
        }
    

    mean is not initialized!

    When you print it:

    fprintf(jack, "%d\t%lf\n",binSize,mean);
    

    "%lf" is undefined, you should use "%f" to print a double or a float.