Search code examples
c++recursionstack-overflow

How to avoid stack overflow in this code, (Recursive function)


I was trying to solve a programming contest problem. I am pretty much a noob in this department (I think I have a lot to learn). I tried to solve the question, which included reading a 2D array(n x m) and finding out the blobs in it. Blobs are formed by contiguous lit pixels(denoted by #). Unlit pixels (denoted by .). I tried to find a blob by using a recursive method Blob::form(). Sample input might look like this

1
6 6
#...#.
.#.#.#
##..#.
......
.#.#.#
#...#.

I came up with the solution in a rush. And it's not much. But as always it fails in the worst condition n = m = 1000 and all chars are #. A 1000 x 1000 version of this:

1
3 3
###
###
###

The problem I assume is stack overflow. I have found out that the program is crashing while forming the blob.

#include <iostream>
#include <string>
#include <sstream>
#include <vector>
#include <algorithm>

using namespace std;
int pat[1000][1000],n,m;
char a[1000][1000];

struct point
{
    int x,y;
};

bool inBounds(point p)
{
    if(p.x < n && p.x >=0 && p.y < m && p.y >= 0) return true;
    else return false;
}
bool isAblob(int i,int j)
{
    point p[8];
    p[0].x = i-1; p[0].y =  j;
    p[1].x = i+1; p[1].y =  j;
    p[2].x = i+1; p[2].y =  j+1;
    p[3].x = i-1; p[3].y =  j-1;
    p[4].x = i-1; p[4].y =  j+1;
    p[5].x = i+1; p[5].y =  j-1;
    p[6].x = i; p[6].y =  j-1;
    p[7].x = i; p[7].y =  j+1;

    for(int k=0;k<8;k++)
    {
        if(inBounds(p[k]))
        {
            if(a[p[k].x][p[k].y] == '#') return true;
        }
    }
    return false;
}


class Blob
{
public:
    long long int pow;

    Blob(int i, int j)
    {
        this->pow = 0;
        point po;
        po.x=i;
        po.y=j;
        this->form(&po);
    }

    int getPow()
    {
        return this->pow;
    }

    void form ( point *p)
    {
        if(inBounds(*p))
        {
            if(a[p->x][p->y] == '#' && !pat[p->x][p->y])
            {
                a[p->x][p->y] = 1;
                this->pow++;
                point *e = new point;
                e->x = p->x-1; e->y =  p->y;if(pat[e->x][e->y] == 0)form(e);
                e->x = p->x+1; e->y =  p->y;if(pat[e->x][e->y] == 0)form(e);
                e->x = p->x+1; e->y =  p->y+1;if(pat[e->x][e->y] == 0)form(e);
                e->x = p->x-1; e->y =  p->y-1;if(pat[e->x][e->y] == 0)form(e);
                e->x = p->x-1; e->y =  p->y+1;if(pat[e->x][e->y] == 0)form(e);
                e->x = p->x+1; e->y =  p->y-1;if(pat[e->x][e->y] == 0)form(e);
                e->x = p->x; e->y =  p->y-1;if(pat[e->x][e->y] == 0)form(e);
                e->x = p->x; e->y =  p->y+1;
                if(pat[e->x][e->y] == 0)form(e);
            }
        }
        return;
    }
};

int main()
{
    int t;

    cin >> t;
    for (int q = 0; q < t; q++)
    {
        cin >> n >> m;
        int bnum = 0;
        Blob *b[(n*m)/2];
        vector <int> pows;
        cin.get();
        for(int i=0;i<n;i++)
        {
            for(int j = 0; j<m;j++)
            {
                a[i][j] = cin.get();
                pat[i][j] = 0;
            }
            cin.get();
        }


        for(int i=0;i<n;i++)
        {
            for(int j = 0; j<m;j++)
            {
                if(a[i][j] == '#' && pat[i][j] == 0)
                {
                    if(isAblob(i,j))
                    {
                        bnum++;
                        b[bnum] = new Blob(i,j);
                        pows.push_back(b[bnum]->getPow());
                    }
                    else continue;
                }
                else continue;
            }
        }
        sort(pows.begin(),pows.end());
        cout << endl << bnum;

        for(int i=1;i<=bnum;i++)
        {
            if(i==1) cout << endl;
            if(i!=1) cout << " ";
            cout << pows[i-1];
        }
    }
}

I am sure my code is buggy and inefficient. I am wondering if someone can give me an insight on how to avoid these problems in the future. Better implementations can be helpful too. But what I am looking for are tips for avoiding stack overflows in the future.


Solution

  • An easy way to avoid recursion while not changing the logic of the program is to use a stack data-structure directly, instead of through the call-stack.

    Here's a modified version of your Blob class that uses std::stack in your form function:

    class Blob
    {
    public:
        long long int pow;
    
        Blob(int i, int j)
        {
            this->pow = 0;
            point po;
            po.x=i;
            po.y=j;
            this->form(po);
        }
    
        int getPow()
        {
            return this->pow;
        }
    
        void form (point p)
        {
            std::stack<point> s;
            s.push(p);
    
            while (!s.empty())
            {
                p=s.top();
                s.pop();
    
                if (!inBounds(p))
                    continue;
    
                if(a[p.x][p.y] == '#' && !pat[p.x][p.y])
                {
                    a[p.x][p.y] = 1;
                    this->pow++;
                    point e;
                    e.x = p.x-1;    e.y =  p.y;     if(pat[e.x][e.y] == 0)s.push(e);
                    e.x = p.x+1;    e.y =  p.y;     if(pat[e.x][e.y] == 0)s.push(e);
                    e.x = p.x+1;    e.y =  p.y+1;   if(pat[e.x][e.y] == 0)s.push(e);
                    e.x = p.x-1;    e.y =  p.y-1;   if(pat[e.x][e.y] == 0)s.push(e);
                    e.x = p.x-1;    e.y =  p.y+1;   if(pat[e.x][e.y] == 0)s.push(e);
                    e.x = p.x+1;    e.y =  p.y-1;   if(pat[e.x][e.y] == 0)s.push(e);
                    e.x = p.x;      e.y =  p.y-1;   if(pat[e.x][e.y] == 0)s.push(e);
                    e.x = p.x;      e.y =  p.y+1;   if(pat[e.x][e.y] == 0)s.push(e);
                }
    
            }
        }
    };
    

    Note that this also fixes your memory leak.

    In general, the problem you are trying to solve seems to be finding "connected components" with a square-shaped neighborhood. Usually, you'd use a disjoint-set data-structure to solve this, which does not need a stack or recursion.With that, you can just scan thru the field once and you have the sizes of all your connected components, not just the one where you check for a blob.