I am trying to re-implement the bilateral grid example using the enhanced generator class (e.g. using schedule()
and generate()
.
But I've got an error when trying to compile the code.
g++ -std=c++11 -I ../../include/ -I ../../tools/ -I ../../apps/support/ -g - fno-rtti bilateral_grid_generator.cpp ../../lib/libHalide.a ../../tools/GenGen.cpp -o bin/bilateral_grid_exec -ldl -lpthread -lz
bin/bilateral_grid_exec -o ./bin target=host
Generator bilateral_grid has base_path ./bin/bilateral_grid
Internal error at /home/xxx/Projects/Halide/src/Generator.cpp:966 triggered by user code at /usr/include/c++/4.8/functional:2057:
Condition failed: generator
make: *** [bin/bilateral_grid.a] Aborted (core dumped)
It seems that I didn't put the definition of RDom
and GeneratorParam
in the correct place. Since r.x
and r.y
are used in both schedule()
and generate()
, I think I should put it as a class member. What should be done to fix this?
Here is the code that I wrote.
class BilateralGrid : public Halide::Generator<BilateralGrid> {
public:
GeneratorParam<int> s_sigma{"s_sigma", 8};
//ImageParam input{Float(32), 2, "input"};
//Param<float> r_sigma{"r_sigma"};
Input<Buffer<float>> input{"input", 2};
Input<float> r_sigma{"r_sigma"};
Output<Buffer<float>> output{"output", 2};
// Algorithm Description
void generate() {
//int s_sigma = 8;
// Add a boundary condition
clamped(x,y) = BoundaryConditions::repeat_edge(input)(x,y);
// Construct the bilateral grid
Expr val = clamped(x * s_sigma + r.x - s_sigma/2, y * s_sigma + r.y - s_sigma/2);
val = clamp(val, 0.0f, 1.0f);
Expr zi = cast<int>(val * (1.0f/r_sigma) + 0.5f);
// Histogram
histogram(x, y, z, c) = 0.0f;
histogram(x, y, zi, c) += select(c == 0, val, 1.0f);
// Blur the grid using a five-tap filter
blurz(x, y, z, c) = (histogram(x, y, z-2, c) +
histogram(x, y, z-1, c)*4 +
histogram(x, y, z , c)*6 +
histogram(x, y, z+1, c)*4 +
histogram(x, y, z+2, c));
blurx(x, y, z, c) = (blurz(x-2, y, z, c) +
blurz(x-1, y, z, c)*4 +
blurz(x , y, z, c)*6 +
blurz(x+1, y, z, c)*4 +
blurz(x+2, y, z, c));
blury(x, y, z, c) = (blurx(x, y-2, z, c) +
blurx(x, y-1, z, c)*4 +
blurx(x, y , z, c)*6 +
blurx(x, y+1, z, c)*4 +
blurx(x, y+2, z, c));
// Take trilinear samples to compute the output
val = clamp(input(x, y), 0.0f, 1.0f);
Expr zv = val * (1.0f/r_sigma);
zi = cast<int>(zv);
Expr zf = zv - zi;
Expr xf = cast<float>(x % s_sigma) / s_sigma;
Expr yf = cast<float>(y % s_sigma) / s_sigma;
Expr xi = x/s_sigma;
Expr yi = y/s_sigma;
interpolated(x, y, c) =
lerp(lerp(lerp(blury(xi, yi, zi, c), blury(xi+1, yi, zi, c), xf),
lerp(blury(xi, yi+1, zi, c), blury(xi+1, yi+1, zi, c), xf), yf),
lerp(lerp(blury(xi, yi, zi+1, c), blury(xi+1, yi, zi+1, c), xf),
lerp(blury(xi, yi+1, zi+1, c), blury(xi+1, yi+1, zi+1, c), xf), yf), zf);
// Normalize and return the output.
bilateral_grid(x, y) = interpolated(x, y, 0)/interpolated(x, y, 1);
output(x,y) = bilateral_grid(x,y);
}
// Scheduling
void schedule() {
// int s_sigma = 8;
if (get_target().has_gpu_feature()) {
// The GPU schedule
Var xi{"xi"}, yi{"yi"}, zi{"zi"};
// Schedule blurz in 8x8 tiles. This is a tile in
// grid-space, which means it represents something like
// 64x64 pixels in the input (if s_sigma is 8).
blurz.compute_root().reorder(c, z, x, y).gpu_tile(x, y, xi, yi, 8, 8);
// Schedule histogram to happen per-tile of blurz, with
// intermediate results in shared memory. This means histogram
// and blurz makes a three-stage kernel:
// 1) Zero out the 8x8 set of histograms
// 2) Compute those histogram by iterating over lots of the input image
// 3) Blur the set of histograms in z
histogram.reorder(c, z, x, y).compute_at(blurz, x).gpu_threads(x, y);
histogram.update().reorder(c, r.x, r.y, x, y).gpu_threads(x, y).unroll(c);
// An alternative schedule for histogram that doesn't use shared memory:
// histogram.compute_root().reorder(c, z, x, y).gpu_tile(x, y, xi, yi, 8, 8);
// histogram.update().reorder(c, r.x, r.y, x, y).gpu_tile(x, y, xi, yi, 8, 8).unroll(c);
// Schedule the remaining blurs and the sampling at the end similarly.
blurx.compute_root().gpu_tile(x, y, z, xi, yi, zi, 8, 8, 1);
blury.compute_root().gpu_tile(x, y, z, xi, yi, zi, 8, 8, 1);
bilateral_grid.compute_root().gpu_tile(x, y, xi, yi, s_sigma, s_sigma);
} else {
// The CPU schedule.
blurz.compute_root().reorder(c, z, x, y).parallel(y).vectorize(x, 8).unroll(c);
histogram.compute_at(blurz, y);
histogram.update().reorder(c, r.x, r.y, x, y).unroll(c);
blurx.compute_root().reorder(c, x, y, z).parallel(z).vectorize(x, 8).unroll(c);
blury.compute_root().reorder(c, x, y, z).parallel(z).vectorize(x, 8).unroll(c);
bilateral_grid.compute_root().parallel(y).vectorize(x, 8);
}
}
Func clamped{"clamped"}, histogram{"histogram"};
Func bilateral_grid{"bilateral_grid"};
Func blurx{"blurx"}, blury{"blury"}, blurz{"blurz"}, interpolated{"interpolated"};
Var x{"x"}, y{"y"}, z{"z"}, c{"c"};
RDom r{0, s_sigma, 0, s_sigma};
};
//Halide::RegisterGenerator<BilateralGrid> register_me{"bilateral_grid"};
HALIDE_REGISTER_GENERATOR(BilateralGrid, "bilateral_grid");
} // namespace
The error here is subtle, and the current assertion failure message is regrettably unhelpful.
The problem here is that this code is using a GeneratorParam
(s_sigma) to initialize a member-variable-RDom
(r), but the GeneratorParam
may not have its final value set at that point. Generally speaking, accessing a GeneratorParam
(or ScheduleParam
) before the generate()
method is called will produce such an assert.
Why is this? Let's look at the way Generators are created and initialized in the typical build system:
bin/bilateral_grid_exec -o ./bin target=host s_sigma=7
, the default value (8) stored in s_sigma
would be replaced with 7.generate()
, then schedule()
, then compiles the result into a .o (or .a, etc).So why are you seeing the assert? What's happening in this code is that in Step 1 above, the ctor for r
is being run in Step 1... but the arguments for the ctor for r
read the current value for s_sigma
, which has a default value (8), but not necessarily the value specified by the build file. If we allowed this read to happen without asserting, you could get inconsistent values for s_sigma
in different parts of the Generator.
You can fix this by deferring the initialization of the RDom to the generate()
method:
class BilateralGrid : public Halide::Generator<BilateralGrid> {
public:
GeneratorParam<int> s_sigma{"s_sigma", 8};
...
void generate() {
r = RDom(0, s_sigma, 0, s_sigma);
...
}
...
private:
RDom r;
};
(Obviously, the assertion failure needs a more helpful error message; I'll modify the code to do so.)