Search code examples
arrayscmultidimensional-arraystructure

Remove rows of multidimensional structures


I need to remove rows which represent dots of multidimensional structure, which are closest to each other.

For example, if structure is:

struct Point dot[100] = {
    {{1, 1, 1, 1}},
    {{2, 2, 2, 2}},
    {{1.3, 1.3, 1.3, 1.3}}
};

After applying Euclidean distance formula, distances are:

d12=sqrt((1-2)^2+(1-2)^2+(1-2)^2+(1-2)^2))=2

d13=sqrt((1-1.3)^2+(1-1.3)^2+(1-1.3)^2+(1-1.3)^2)=0.6

d23=sqrt((2-3)^2+(2-3)^2+(2-3)^2+(2-3)^2))=2

Closest points are the ones that represent row1 and row3, and they should be removed from structure.

OUTPUT should be: (2,2,2,2)

#include <stdio.h>
struct Point {
    double coordinate[100];
};
void remove_closest(struct Point dot[], int n, int dimension){
int i,j;
for(i=0;i<n;i++){
    for(j=0;j<dimension-1;j++){
      // 
    }
}
}
int main() {
struct Point dot[100] = {
    {{1, 1, 1, 1}},
    {{2, 2, 2, 2}},
    {{1.3, 1.3, 1.3, 1.3}}
};
 int i,j,dimension=4;
 remove_closest(dot, 3, dimension);

 for (i=0; i<3; i++) {
    printf("(");
    for (j=0; j<dimension-1; j++)
        printf("%g,", dot[i].coordinate[j]);
    printf("%g)\n", dot[i].coordinate[dimension-1]);
}
    return 0;
}

Structures are new to me, and multidimensional array makes it even harder. Could you help me with this task?

More example of input/output

  • Note: auxiliary arrays are not allowed

Solution

  • Okay, based on all of the criteria, here is what I'd do. I've generalized the code a bit. And I've renamed a few variables to be more descriptive. The code is annotated:

    #include <stdio.h>
    #include <stdlib.h>
    #include <math.h>
    
    // number of elements in an array
    #define COUNTOF(_arr)           (sizeof(_arr) / sizeof(_arr[0]))
    
    // maximum number of dimensions -- adjust to suit
    #define MAXDIM      4
    
    struct point {
        double coordinate[MAXDIM];
    };
    
    // prtpoint -- print single point
    void
    prtpoint(const struct point *ptcur,int ndim)
    {
        int sep = '(';
    
        for (int idxdim = 0; idxdim < ndim;  ++idxdim) {
            printf("%c%g",sep,ptcur->coordinate[idxdim]);
            sep = ',';
        }
    
        printf(")\n");
    }
    
    // prtlist -- print all points
    void
    prtlist(const struct point *points,int npoint,int ndim,const char *title)
    {
    
        if (title != NULL)
            printf("%s: %d\n",title,npoint);
    
        for (int idxdot = 0;  idxdot < npoint;  ++idxdot)
            prtpoint(&points[idxdot],ndim);
    }
    
    double
    square(double x)
    {
    
        return x * x;
    }
    
    double
    euclidean_distance(const struct point *points,int ndim,int idxlo,int idxhi)
    {
        double dist = 0.0;
        const struct point *ptlo = &points[idxlo];
        const struct point *pthi = &points[idxhi];
    
        // sum of the squares of all the dimensions
        for (int idxdim = 0;  idxdim < ndim;  ++idxdim)
            dist += square(ptlo->coordinate[idxdim] - pthi->coordinate[idxdim]);
    
        dist = sqrt(dist);
    
        return dist;
    }
    
    // remove_closest -- remove closest points
    int
    remove_closest(struct point *points, int npoint, int ndim)
    {
        int minlo = -1;
        int minhi = -1;
        double mindist = 0.0;
    
        // find the indexes of the two points that have the minimum distance
        for (int idxlo = 0;  idxlo < (npoint - 1);  ++idxlo) {
            for (int idxhi = idxlo + 1;  idxhi < npoint;  ++idxhi) {
                double curdist = euclidean_distance(points,ndim,idxlo,idxhi);
    
                // (1) grab the first distance we calculate as the minimum
                // (2) get the "best" minimum
                if ((minlo < 0) || (curdist < mindist)) {
                    minlo = idxlo;
                    minhi = idxhi;
                    mindist = curdist;
                    continue;
                }
            }
        }
    
        // strip the two closest entries
        struct point *ptsrc = points;
        struct point *ptdst = points;
        for (int idxcur = 0;  idxcur < npoint;  ++idxcur, ++ptsrc) {
            // skip either of the two lowest points
            if ((idxcur == minlo) || (idxcur == minhi)) {
    #ifdef DEBUG
                printf("REMOVE: %d ",idxcur);
                prtpoint(ptsrc,ndim);
    #endif
                continue;
            }
    
            // copy the point to fill the gap
            // NOTE: we _could_ just _always_ do the copy but doing this
            // conditionally is a speedup (i.e. avoid unnecessary copies)
            if (ptdst != ptsrc)
                *ptdst = *ptsrc;
    
            ++ptdst;
        }
    
        npoint -= 2;
    
        return npoint;
    }
    
    void
    dolist(struct point *points,int npoint,int ndim)
    {
    
        // should never happen [because if we get here, the compiler _should_
        // have complained in the definition of the array we've been passed]
        if (ndim > MAXDIM)
            exit(9);
    
        prtlist(points,npoint,ndim,"Before");
        npoint = remove_closest(points,npoint,ndim);
        prtlist(points,npoint,ndim,"After");
    }
    
    void
    test01(void)
    {
        struct point points[] = {
            {{1, 1, 1, 1}},
            {{2, 2, 2, 2}},
            {{1.3, 1.3, 1.3, 1.3}}
        };
    
        dolist(points,COUNTOF(points),4);
    }
    
    void
    test02(void)
    {
        struct point points[] = {
            // NOTE: duplicate points aren't handled the way this example implies
    #if 0
            {{1, 1, 1, 1}},
    #endif
            {{1, 1, 1, 1.00}},
            {{1, 1, 1, 1.2}},
            {{1, 1, 1, 1.145}},
            {{1, 1, 1, 1.31}},
            {{1, 1, 1, 1.1}},
        };
    
        dolist(points,COUNTOF(points),4);
    }
    
    int
    main(void)
    {
    
        test01();
        test02();
    
        return 0;
    }
    

    After compiling with -DDEBUG, here is the program output:

    Before: 3
    (1,1,1,1)
    (2,2,2,2)
    (1.3,1.3,1.3,1.3)
    REMOVE: 0 (1,1,1,1)
    REMOVE: 2 (1.3,1.3,1.3,1.3)
    After: 1
    (2,2,2,2)
    Before: 5
    (1,1,1,1)
    (1,1,1,1.2)
    (1,1,1,1.145)
    (1,1,1,1.31)
    (1,1,1,1.1)
    REMOVE: 2 (1,1,1,1.145)
    REMOVE: 4 (1,1,1,1.1)
    After: 3
    (1,1,1,1)
    (1,1,1,1.2)
    (1,1,1,1.31)
    

    UPDATE:

    thank you very much, could you just modify without typedef? – devec

    Well, if you really wish ;-)

    The original was:

    typedef struct point {
        double coordinate[MAXDIM];
    } point_t;
    

    Note that if I wanted to be a "smarty pants", I could do this with a #define and still fulfill the requirement ;-):

    struct point {
        double coordinate[MAXDIM];
    };
    #define point_t struct point
    

    But, I've edited the above code to use just the normal struct. All I did was just do the simple struct definition. Then, just globally change all point_t into struct point. So, really not too difficult. (e.g.) Change:

    double
    euclidean_distance(const point_t *points,int ndim,int idxlo,int idxhi)
    

    Into:

    double
    euclidean_distance(const struct point *points,int ndim,int idxlo,int idxhi)
    

    UPDATE #2:

    Although I touched on the briefly in the comments for test02 [taken from your linked data example], the code at present doesn't handle exact duplicate points too well. The data was the same for all points in all dimensions except the last one:

    1
    1.1
    1.00
    1.2
    1.145
    1.31
    
    1. As present, if we have two identical points, these are the ones that would be removed (i.e. their distance is 0.0). So, we'd remove 1 and 1.00
    2. This is contrary to your second example where the 1.1 and 1.00 points were removed.
    3. Even if we excluded/ignored points that are the same, it would be the first copy of 1.00 that would match. That is, the 1 point and the 1.1 point would be removed.
    4. I punted on this a bit by just removing one of the points that was identical in the input data.

    After considering the above:

    1. In your example, the two points [you] removed were 1.1 and 1.00. The distance is: 0.1
    2. But, my code removed 1.1 and 1.145. The distance is: 0.045

    So, I think your example was incorrect and you removed the wrong points. You may have to decide if this is a relevant issue for you.