Marcin Kołodziejski Marcin Kołodziejski - 3 months ago 23
C++ Question

Halide Jit compilation

Im trying to compile my halide program to jit to use it later in code few times on different images. But i think i making something wrong, can anyone correct me?
First I create halide function to run:

void m_gammaFunctionTMOGenerate()
{
Halide::ImageParam img(Halide::type_of<float>(), 3);
img.set_stride(0, 4);
img.set_stride(2, 1);
Halide::Var x, y, c;
Halide::Param<float> key, sat, clampMax, clampMin;
Halide::Param<bool> cS;
Halide::Func gamma;
// algorytm
//img.width() , img.height();
if (cS.get())
{
float k1 = 1.6774;
float k2 = 0.9925;
sat.set((1 + k1) * pow(key.get(), k2) / (1 + k1 * pow(key.get(), k2)));
}
Halide::Expr luminance = img(x, y, 0) * 0.072186f + img(x, y, 1) * 0.715158f + img(x, y, 2) * 0.212656f;
Halide::Expr ldr_lum = (luminance - clampMin) / (clampMax - clampMin);
Halide::clamp(ldr_lum, 0.f, 1.f);
ldr_lum = Halide::pow(ldr_lum, key);
Halide::Expr imLum = img(x, y, c) / luminance;
imLum = Halide::pow(imLum, sat) * ldr_lum;
Halide::clamp(imLum, 0.f, 1.f);
gamma(x, y, c) = imLum;
// rozkład
gamma.vectorize(x, 16).parallel(y);

// kompilacja
auto & obuff = gamma.output_buffer();
obuff.set_stride(0, 4);
obuff.set_stride(2, 1);
obuff.set_extent(2, 3);
std::vector<Halide::Argument> arguments = { img, key, sat, clampMax, clampMin, cS };
m_gammaFunction = (gammafunction)(gamma.compile_jit());

}


store it in pointer:

typedef int(*gammafunction)(buffer_t*, float, float, float, float, bool, buffer_t*);
gammafunction m_gammaFunction;


then i try to run it:

buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 4; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);

// Run the pipeline
int error = m_photoFunction(&buf, params[0], &output_buf);


But it doesn't work...
Error:

Exception thrown at 0x000002974F552DE0 in Viewer.exe: 0xC0000005: Access violation executing location 0x000002974F552DE0.

If there is a handler for this exception, the program may be safely continued.


Edit:

Here is my code for running function:

buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 3; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);

// Run the pipeline
int error = m_gammaFunction(&buf, params[0], params[1], params[2], params[3], params[4] > 0.5 ? true : false, &output_buf);

if (error) {
printf("Halide returned an error: %d\n", error);
return -1;
}

memcpy(output, data, size * sizeof(float));


can anyone help me with it?

Edit:

Thanks to @KhouriGiordano I found out what I was doing wrong. Indeed I switched from AOT compiling to this code. So now my code looks like that:

class GammaOperator
{
public:
GammaOperator();

int realize(buffer_t * input, float params[], buffer_t * output, int width);
private:

HalideFloat m_key;
HalideFloat m_sat;
HalideFloat m_clampMax;
HalideFloat m_clampMin;
HalideBool m_cS;

Halide::ImageParam m_img;
Halide::Var x, y, c;
Halide::Func m_gamma;
};


GammaOperator::GammaOperator()
: m_img( Halide::type_of<float>(), 3)
{

Halide::Expr w = (1.f + 1.6774f) * pow(m_key.get(), 0.9925f) / (1.f + 1.6774f * pow(m_key.get(), 0.9925f));
Halide::Expr sat = Halide::select(m_cS, m_sat, w);

Halide::Expr luminance = m_img(x, y, 0) * 0.072186f + m_img(x, y, 1) * 0.715158f + m_img(x, y, 2) * 0.212656f;
Halide::Expr ldr_lum = (luminance - m_clampMin) / (m_clampMax - m_clampMin);
ldr_lum = Halide::clamp(ldr_lum, 0.f, 1.f);
ldr_lum = Halide::pow(ldr_lum, m_key);
Halide::Expr imLum = m_img(x, y, c) / luminance;
imLum = Halide::pow(imLum, sat) * ldr_lum;
imLum = Halide::clamp(imLum, 0.f, 1.f);
m_gamma(x, y, c) = imLum;

}

int GammaOperator::realize(buffer_t * input, float params[], buffer_t * output, int width)
{
m_img.set(Halide::Buffer(Halide::type_of<float>(), input));
m_img.set_stride(0, 4);
m_img.set_stride(1, width * 4);
m_img.set_stride(2, 4);
// algorytm
m_gamma.vectorize(x, 16).parallel(y);

//params[0], params[1], params[2], params[3], params[4] > 0.5 ? true : false
//{ img, key, sat, clampMax, clampMin, cS };
m_key.set(params[0]);
m_sat.set(params[1]);
m_clampMax.set(params[2]);
m_clampMin.set(params[3]);
m_cS.set(params[4] > 0.5f ? true : false);
//// kompilacja
m_gamma.realize(Halide::Buffer(Halide::type_of<float>(), output));
return 0;
}


and i use it like that:

buffer_t output_buf = { 0 };
//// The host pointers point to the start of the image data:
buffer_t buf = { 0 };
buf.host = (uint8_t *)data; // Might also need const_cast
float * output = new float[width * height * 4];
output_buf.host = (uint8_t*)(output);
// // If the buffer doesn't start at (0, 0), then assign mins
output_buf.extent[0] = buf.extent[0] = width; // In elements, not bytes
output_buf.extent[1] = buf.extent[1] = height; // In elements, not bytes
output_buf.extent[2] = buf.extent[2] = 4; // Assuming RGBA
// // No need to assign additional extents as they were init'ed to zero above
output_buf.stride[0] = buf.stride[0] = 4; // RGBA interleaved
output_buf.stride[1] = buf.stride[1] = width * 4; // Assuming no line padding
output_buf.stride[2] = buf.stride[2] = 1; // Channel interleaved
output_buf.elem_size = buf.elem_size = sizeof(float);

// Run the pipeline

int error = s_gamma->realize(&buf, params, &output_buf, width);


but it is still crashing on m_gamma.realize function with info in console:

Error: Constraint violated: f0.stride.0 (4) == 1 (1)

Answer

By using Halide::Param::get(), you are extracting the (default of 0) value from the Param object at the time you call get(). If you want to use the parameter value given at the time you call the generated function, just use it without calling get and it should be implicitly converted to an Expr.

Since Param is not convertible to a boolean, the Halide way of doing an if is Halide::select().

You aren't using the clamped return value of Halide::clamp().

I don't see cS being used by the Halide code, only the C code.

Now to your JIT problem. It looks like you started doing AOT compilation and switched to JIT.

You make a std::vector<Halide::Argument> but don't pass it anywhere. How can Halide know what Param you want to use? It looks at the Func and finds references to ImageParam and Param objects.

How can you know what order it expects the Param? You have no control over this. I was able to dump the bitcode by defining HL_GENBITCODE=1 and then view that with llvm-dis to see your function:

int gamma
    ( buffer_t *img
    , float clampMax
    , float key
    , float clampMin
    , float sat
    , void *user_context
    , buffer_t *result
    );
  • Use gamma.realize(Halide::Buffer(Halide::type_of<float>(), &output_buf)) instead of using gamma.compile_jit() and trying to call the generated function properly.

For one time use:

  • Use Image instead of ImageParam.
  • Use Expr instead of Param.

For repeated use with a single JIT compile:

  • Keep the ImageParam and Param around and set them before realizing the Func.