Search code examples
halide

Bilateral Grid Generator class using Enhanced Generator


I am trying to re-implement the bilateral grid example using the enhanced generator class (e.g. using schedule() and generate(). But I've got an error when trying to compile the code.

g++ -std=c++11 -I ../../include/ -I ../../tools/ -I ../../apps/support/ -g -  fno-rtti bilateral_grid_generator.cpp ../../lib/libHalide.a ../../tools/GenGen.cpp -o bin/bilateral_grid_exec  -ldl -lpthread -lz
bin/bilateral_grid_exec -o ./bin  target=host 
Generator bilateral_grid has base_path ./bin/bilateral_grid
Internal error at /home/xxx/Projects/Halide/src/Generator.cpp:966 triggered by user code at /usr/include/c++/4.8/functional:2057:
Condition failed: generator
make: *** [bin/bilateral_grid.a] Aborted (core dumped)

It seems that I didn't put the definition of RDom and GeneratorParam in the correct place. Since r.x and r.y are used in both schedule() and generate(), I think I should put it as a class member. What should be done to fix this?

Here is the code that I wrote.

class BilateralGrid : public Halide::Generator<BilateralGrid> {
public:
GeneratorParam<int>   s_sigma{"s_sigma", 8};

//ImageParam            input{Float(32), 2, "input"};
//Param<float>          r_sigma{"r_sigma"};

Input<Buffer<float>>  input{"input", 2};
Input<float>          r_sigma{"r_sigma"};

Output<Buffer<float>> output{"output", 2};

// Algorithm Description
void generate() {
    //int s_sigma = 8;
    // Add a boundary condition
    clamped(x,y) = BoundaryConditions::repeat_edge(input)(x,y);

    // Construct the bilateral grid
    Expr val = clamped(x * s_sigma + r.x - s_sigma/2, y * s_sigma + r.y - s_sigma/2);
    val = clamp(val, 0.0f, 1.0f);

    Expr zi = cast<int>(val * (1.0f/r_sigma) + 0.5f);

    // Histogram
    histogram(x, y, z, c) = 0.0f;
    histogram(x, y, zi, c) += select(c == 0, val, 1.0f);

    // Blur the grid using a five-tap filter
    blurz(x, y, z, c) = (histogram(x, y, z-2, c) +
                         histogram(x, y, z-1, c)*4 +
                         histogram(x, y, z  , c)*6 +
                         histogram(x, y, z+1, c)*4 +
                         histogram(x, y, z+2, c));
    blurx(x, y, z, c) = (blurz(x-2, y, z, c) +
                         blurz(x-1, y, z, c)*4 +
                         blurz(x  , y, z, c)*6 +
                         blurz(x+1, y, z, c)*4 +
                         blurz(x+2, y, z, c));
    blury(x, y, z, c) = (blurx(x, y-2, z, c) +
                         blurx(x, y-1, z, c)*4 +
                         blurx(x, y  , z, c)*6 +
                         blurx(x, y+1, z, c)*4 +
                         blurx(x, y+2, z, c));

    // Take trilinear samples to compute the output
    val     = clamp(input(x, y), 0.0f, 1.0f);
    Expr zv = val * (1.0f/r_sigma);
    zi      = cast<int>(zv);
    Expr zf = zv - zi;
    Expr xf = cast<float>(x % s_sigma) / s_sigma;
    Expr yf = cast<float>(y % s_sigma) / s_sigma;
    Expr xi = x/s_sigma;
    Expr yi = y/s_sigma;

    interpolated(x, y, c) =
        lerp(lerp(lerp(blury(xi, yi, zi, c), blury(xi+1, yi, zi, c), xf),
                  lerp(blury(xi, yi+1, zi, c), blury(xi+1, yi+1, zi, c), xf), yf),
             lerp(lerp(blury(xi, yi, zi+1, c), blury(xi+1, yi, zi+1, c), xf),
                  lerp(blury(xi, yi+1, zi+1, c), blury(xi+1, yi+1, zi+1, c), xf), yf), zf);

    // Normalize and return the output.
    bilateral_grid(x, y) = interpolated(x, y, 0)/interpolated(x, y, 1);
    output(x,y)          = bilateral_grid(x,y);

}

// Scheduling
void schedule() { 
    // int s_sigma = 8;
    if (get_target().has_gpu_feature()) {
        // The GPU schedule
        Var xi{"xi"}, yi{"yi"}, zi{"zi"};

        // Schedule blurz in 8x8 tiles. This is a tile in
        // grid-space, which means it represents something like
        // 64x64 pixels in the input (if s_sigma is 8).
        blurz.compute_root().reorder(c, z, x, y).gpu_tile(x, y, xi, yi, 8, 8);

        // Schedule histogram to happen per-tile of blurz, with
        // intermediate results in shared memory. This means histogram
        // and blurz makes a three-stage kernel:
        // 1) Zero out the 8x8 set of histograms
        // 2) Compute those histogram by iterating over lots of the input image
        // 3) Blur the set of histograms in z
        histogram.reorder(c, z, x, y).compute_at(blurz, x).gpu_threads(x, y);
        histogram.update().reorder(c, r.x, r.y, x, y).gpu_threads(x, y).unroll(c);

        // An alternative schedule for histogram that doesn't use shared memory:
        // histogram.compute_root().reorder(c, z, x, y).gpu_tile(x, y, xi, yi, 8, 8);
        // histogram.update().reorder(c, r.x, r.y, x, y).gpu_tile(x, y, xi, yi, 8, 8).unroll(c);

        // Schedule the remaining blurs and the sampling at the end similarly.
        blurx.compute_root().gpu_tile(x, y, z, xi, yi, zi, 8, 8, 1);
        blury.compute_root().gpu_tile(x, y, z, xi, yi, zi, 8, 8, 1);
        bilateral_grid.compute_root().gpu_tile(x, y, xi, yi, s_sigma, s_sigma);
    } else {
        // The CPU schedule.
        blurz.compute_root().reorder(c, z, x, y).parallel(y).vectorize(x, 8).unroll(c);
        histogram.compute_at(blurz, y);
        histogram.update().reorder(c, r.x, r.y, x, y).unroll(c);
        blurx.compute_root().reorder(c, x, y, z).parallel(z).vectorize(x, 8).unroll(c);
        blury.compute_root().reorder(c, x, y, z).parallel(z).vectorize(x, 8).unroll(c);
        bilateral_grid.compute_root().parallel(y).vectorize(x, 8);
    }
}

Func clamped{"clamped"}, histogram{"histogram"};
Func bilateral_grid{"bilateral_grid"};
Func blurx{"blurx"}, blury{"blury"}, blurz{"blurz"}, interpolated{"interpolated"};
Var x{"x"}, y{"y"}, z{"z"}, c{"c"};
RDom r{0, s_sigma, 0, s_sigma};

};

//Halide::RegisterGenerator<BilateralGrid> register_me{"bilateral_grid"};
HALIDE_REGISTER_GENERATOR(BilateralGrid, "bilateral_grid");

}  // namespace

Solution

  • The error here is subtle, and the current assertion failure message is regrettably unhelpful.

    The problem here is that this code is using a GeneratorParam (s_sigma) to initialize a member-variable-RDom (r), but the GeneratorParam may not have its final value set at that point. Generally speaking, accessing a GeneratorParam (or ScheduleParam) before the generate() method is called will produce such an assert.

    Why is this? Let's look at the way Generators are created and initialized in the typical build system:

    1. GenGen.cpp creates an instance of the Generator's C++ class; naturally, this executes its C++ constructor, as well as the C++ constructors for all its member variables, in their order of declaration.
    2. GenGen.cpp uses arguments provided on the command line to override the default values of GeneratorParams. For example, if you had invoked the Generator with bin/bilateral_grid_exec -o ./bin target=host s_sigma=7, the default value (8) stored in s_sigma would be replaced with 7.
    3. GenGen.cpp calls generate(), then schedule(), then compiles the result into a .o (or .a, etc).

    So why are you seeing the assert? What's happening in this code is that in Step 1 above, the ctor for r is being run in Step 1... but the arguments for the ctor for r read the current value for s_sigma, which has a default value (8), but not necessarily the value specified by the build file. If we allowed this read to happen without asserting, you could get inconsistent values for s_sigma in different parts of the Generator.

    You can fix this by deferring the initialization of the RDom to the generate() method:

    class BilateralGrid : public Halide::Generator<BilateralGrid> { public: GeneratorParam<int> s_sigma{"s_sigma", 8}; ... void generate() { r = RDom(0, s_sigma, 0, s_sigma); ... } ... private: RDom r; };

    (Obviously, the assertion failure needs a more helpful error message; I'll modify the code to do so.)