I am trying to sort a 2D array of doubles using qsort() in C. The arrays contain 3D point data, which is read in from a file using fscanf. My programming skills are rather limited, but I have really large datasets that I need to deal with. Sorry in advance if my code sucks.
23127.947, 23127.947, 23127.947
523127.790, 523127.790, 523127.790
523127.747, 523127.747, 523127.747
523127.761, 523127.761, 523127.761
523127.768, 523127.768, 523127.768
(...for 3,158,632 points)
I've used printf to isolate that the problem in my code seems to be the qsort() line, which causes a segmentation fault. From other questions on Stack Overflow that I read, it could be a problem with my "compare" function. Examples for doing a 1D array seemed easy, but the examples I saw for 2D arrays didn't go into comparing the other dimensions (first X, then if X1 = X2, compare Y, then if Y1 = Y2, compare Z).
int main(int argc, char *argv[]) {
int i,j,c;
double x,y,z;
int ROWS = 3158632;
int COLS = 3;
char buffer[100];
double** data = Make2DDoubleArray(ROWS, COLS);
//Open the plot file to read in, and have an output write file
FILE *fp = fopen("Plot_1-2.txt","r");
if(fp == NULL) {
printf("Can't open file\n");
exit;
}
fgets(buffer, 100, fp); //Ignore header
for(i=0; ; i++){
if ((c = fgetc(fp)) == EOF){
break;
}
fscanf(fp,"%lf, %lf, %lf",&x, &y, &z);
data[i][0] = x;
data[i][1] = y;
data[i][2] = z;
}
printf("First 5 unsorted numbers:\n");
for(j=0;j<5;j++){
printf("Line %d: %.3lf, %.3lf, %.3lf\n",j, data[j][0], data[j][0], data[j][0]);
}
printf("Last 5 unsorted numbers:\n");
for(j=ROWS-5;j<ROWS;j++){
printf("Line %d: %.3lf, %.3lf, %.3lf\n",j, data[j][0], data[j][0], data[j][0]);
}
/* Sort array using Quicksort algorithm: */
printf("Sorting...\n");
qsort(data, ROWS, COLS*sizeof(double), &compare);
printf("First 10 sorted numbers:\n");
for(j=0;j<10;j++){
printf("Line %d: %.3lf, %.3lf, %.3lf\n",j, data[j][0], data[j][0], data[j][0]);
}
fclose(fp);
for (i=0; i<ROWS; i++){
free(data[i]);
}
free(data);
return 0;
}
double** Make2DDoubleArray(int arraySizeX, int arraySizeY) {
double** theArray;
int i;
theArray = (double**) malloc(arraySizeX*sizeof(double*));
for (i = 0; i < arraySizeX; i++)
theArray[i] = (double*) malloc(arraySizeY*sizeof(double));
return theArray;
}
int compare(const void *arg1, const void *arg2) {
//double a, b, c, d, e, f;
double *a = (double*)arg1;
double *b = (double*)arg2;
double *c = ((double*)arg1 + 1);
double *d = ((double*)arg2 + 1);
double *e = ((double*)arg1 + 2);
double *f = ((double*)arg2 + 2);
if(a > b)
return 1;
else if(a < b)
return -1;
else {
if(c > d)
return 1;
else if(c < d)
return -1;
else {
if(e > f)
return 1;
else if(e < f)
return -1;
else
return 0;
}
}
}
I'm wondering if telling qsort to go "COLS * sizeof(double)" is the wrong way to do it with how I allocated the memory for the 2D array? Would treating this problem as a 1D array make the rest of it work? I'd prefer to keep it as a 2D array, if possible.
None of this means anything without headers like <stdio.h>
, <stdlib.h>
, etc...
Please explain exit;
. I think you mean exit(0);
.
There are a few problems in your main
. Because of that fgetc
, your code potentially loses the most significant digit of your first value, which is a subtle bug. If you want to test for EOF, test the return value of scanf
(Jee! I didn't think of that! I wish they wrote these things in manuals! Duh, they do...). The example at the end of the file is better than this, because that example ensures that three values are actually parsed by fscanf
.
for(size_t i=0; fscanf(fp,"%lf, %lf, %lf",&x, &y, &z) != EOF; i++){
data[i][0] = x;
data[i][1] = y;
data[i][2] = z;
}
There's a problem in your Make2DDoubleArray
function. It allocates many disjoint arrays, which qsort
can't handle. Isn't it much cleaner to allocate your array in one step?
void *Make2DDoubleArray(size_t x) {
double (*theArray)[3] = malloc(x * sizeof *theArray);
return theArray;
}
theArray
is declared as a pointer to an array of 3 doubles. You don't even need a Make2DDoubleArray
for this.
There's a problem in the compare
function.
double *a = (double*)arg1;
double *b = (double*)arg2;
a
and b
are pointers,
if(a > b)
return 1;
else if(a < b)
return -1;
... yet your code compares them as integers, rendering the sort malfunctional. The address of array[0]
will always be less than the address of array[1]
.
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
int main(int argc, char *argv[]) {
int j,c;
double x,y,z;
size_t ROWS = 3158632;
size_t COLS = 3;
char buffer[100];
double (*theArray)[COLS] = malloc(ROWS * sizeof *theArray);
//Open the plot file to read in, and have an output write file
FILE *fp = fopen("Plot_1-2.txt","r");
if(fp == NULL) {
printf("Can't open file\n");
exit(0);
}
fgets(buffer, 100, fp); //Ignore header
for(size_t i=0; fscanf(fp,"%lf, %lf, %lf", &x, &y, &z) == 3; i++){
data[i][0] = x;
data[i][1] = y;
data[i][2] = z;
}
printf("First 5 unsorted numbers:\n");
for(size_t j=0; j<5; j++){
printf("Line %zu: %.3lf, %.3lf, %.3lf\n", j, data[j][0], data[j][0], data[j][0]);
}
puts("Last 5 unsorted numbers:");
for(size_t j=ROWS-5; j<ROWS; j++){
printf("Line %zu: %.3lf, %.3lf, %.3lf\n", j, data[j][0], data[j][0], data[j][0]);
}
/* Sort array using Quicksort algorithm: */
puts("Sorting...");
qsort(data, ROWS, sizeof *data, compare);
puts("First 10 sorted numbers:");
for(size_t j=0;j<10;j++){
printf("Line %zu: %.3lf, %.3lf, %.3lf\n", j, data[j][0], data[j][0], data[j][0]);
}
fclose(fp);
free(data);
return 0;
}
int compare(const void *arg1, const void *arg2) {
double (*x)[3] = arg1;
double (*y)[3] = arg2;
if ((*x)[0] > (*y)[0])
return 1;
else if ((*x)[0] < (*y)[0])
return -1;
else if ((*x)[1] > (*y)[1])
return 1;
else if ((*x)[1] < (*y)[1])
return -1;
else if ((*x)[2] > (*y)[2])
return 1;
else if ((*x)[2] < (*y)[2])
return -1;
else
return 0;
}