Chan Kim Chan Kim - 1 month ago 7
C Question

convolution result different from that of caffe

I tried writing the convolution function myself to compare the result with caffe result. I used bottom and top blob data and weights extracted while running a demo program. I'm very sure the extracted data are correct.
Here is the convolution test code that I wrote but the result is different. In this example, I have 64 input feature maps and 64 output feature maps and use 3x3 kernel. This program reads the bottom data from 64 files, reads weights and bias for 64x64 3x3 kernel and weights and use it to do convolution and saves the top data into 64 files. very simple program.. I would appreciate if any one can notice any mistake here and let me know.

// written by Chan Kim for simple convolution test

/*
layer 2 type Convolution num_bottoms 1 num_tops 1
layer 2 bottom 0 shape "1 64 600 800 (30720000)"
layer 2 top 0 shape = "1 64 600 800 (30720000)"
layer 2 kernel_size = 3
bottom size : 4*64*600*800 = 0x7530000 Bytes
top size : 4*64*600*800 = 0x7530000 Bytes
kernel size : 4*3*3*64*64 = 0x24000 Bytes
in word size, bottom : 0x1d4c000 kernel : 0x9000 Words
*/

#include <stdio.h>
#define NUM_IFM 64
#define NUM_OFM 64
#define HEIGHT 600
#define WIDTH 800
#define K 3 // 3x3 kernel
float bottom[1][NUM_IFM][HEIGHT][WIDTH];
float top[1][NUM_IFM][HEIGHT][WIDTH];
float weights[NUM_OFM][NUM_IFM][K][K];
float bias[NUM_OFM];
float conv[1][NUM_IFM][HEIGHT][WIDTH]; // result
char str[80];
float kern[K][K];
float in_square[K][K];
float sum;


#define layer 2 // for test

main()
{

FILE *file;
int ifm_idx;
int ofm_idx;
int orix;
int ocix;
char c;
int r;
int rix, cix;
char line[80];
char *ll;
float v0, v1, v2, v3, v4, v5, v6, v7;
int kyi, kxi;
int orixm, orixp;
int ocixm, ocixp;
int kx, ky;
int i;

// --------------------------------------------------------
// reading blob data from files into blob memory
// --------------------------------------------------------
printf("startnig!\n");
for(ifm_idx=0;ifm_idx<NUM_IFM;ifm_idx++) {
sprintf(str, "/home/ckim/Neuro/convhw/ext-z840/L%02d_Convolution_B0_FN%03d.txt", layer, ifm_idx);
file = fopen(str, "r");
c = fgetc(file);
rix = -1;
while (c != EOF) {
if (c == '#') {
ll = fgets(line, 80, file);
c = fgetc(file);
rix++;
cix = 0;
}
else {
r = ungetc(c, file);
r = fscanf(file, "%f %f %f %f %f %f %f %f", &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7);
bottom[0][ifm_idx][rix][cix++] = v0;
bottom[0][ifm_idx][rix][cix++] = v1;
bottom[0][ifm_idx][rix][cix++] = v2;
bottom[0][ifm_idx][rix][cix++] = v3;
bottom[0][ifm_idx][rix][cix++] = v4;
bottom[0][ifm_idx][rix][cix++] = v5;
bottom[0][ifm_idx][rix][cix++] = v6;
bottom[0][ifm_idx][rix][cix++] = v7;
c = fgetc(file);
}
}
printf("file %s read.. \n",str);
fclose(file);
}

// --------------------------------------------------------
// reading weights from files into blob memory
// --------------------------------------------------------
sprintf(str, "/home/ckim/Neuro/convhw/ext1/L%02d_Convolution_Weights.txt", layer);
file = fopen(str, "r");
ll = fgets(line, 80, file); // read the layer config line
for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) {
//for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) {
ll = fgets(line, 80, file); // read the line '## For output map ofm_idx'
for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) {
//for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) {
ll = fgets(line, 80, file); // read the line '## for input map ifm_idx'
for(kyi = 0; kyi < K; kyi++) {
fscanf(file, "%f %f %f", &v0, &v1, &v2); // K=3 always
weights[ofm_idx][ifm_idx][kyi][0] = v0;
weights[ofm_idx][ifm_idx][kyi][1] = v1;
weights[ofm_idx][ifm_idx][kyi][2] = v2;
ll = fgets(line, 80, file); // read off remaining line
}
}
// read bias values
}
for(i = 0, ofm_idx = 0; i < NUM_OFM/8; i++){
ll = fgets(line, 80, file);
r = fscanf(file, "%f %f %f %f %f %f %f %f", &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7);
bias[ofm_idx++] = v0;
bias[ofm_idx++] = v1;
bias[ofm_idx++] = v2;
bias[ofm_idx++] = v3;
bias[ofm_idx++] = v4;
bias[ofm_idx++] = v5;
bias[ofm_idx++] = v6;
bias[ofm_idx++] = v7;
}
fclose(file);

// --------------------------------------------------------
// perform convolution
// --------------------------------------------------------
printf("starting convolution..\n");
for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) {
printf("making OFM %d\n", ofm_idx);
for(orix = 0; orix < HEIGHT; orix++) {
for(ocix = 0; ocix < WIDTH; ocix++) {

sum = 0.; // for single point

// for each input maps
for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) {

// get kernel
kern[0][0] = weights[ofm_idx][ifm_idx][0][0];
kern[0][1] = weights[ofm_idx][ifm_idx][0][1];
kern[0][2] = weights[ofm_idx][ifm_idx][0][2];
kern[1][0] = weights[ofm_idx][ifm_idx][1][0];
kern[1][1] = weights[ofm_idx][ifm_idx][1][1];
kern[1][2] = weights[ofm_idx][ifm_idx][1][2];
kern[2][0] = weights[ofm_idx][ifm_idx][2][0];
kern[2][1] = weights[ofm_idx][ifm_idx][2][1];
kern[2][2] = weights[ofm_idx][ifm_idx][2][2];

// zero-value padding (used in caffe)
in_square[0][0] = (orix == 0 ||ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix];
in_square[0][1] = (orix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix];
in_square[0][2] = (orix == 0 || ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix];
in_square[1][0] = (ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix];
in_square[1][1] = bottom[0][ifm_idx][orix][ocix];
in_square[1][2] = (ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix];
in_square[2][0] = (orix == HEIGHT-1 || ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix];
in_square[2][1] = (orix == HEIGHT-1) ? 0 : bottom[0][ifm_idx][orix][ocix];
in_square[2][2] = (orix == HEIGHT-1 || ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix];

// apply kernel
for(ky = 0; ky < K; ky++) {
for(kx = 0; kx < K; kx++) {
sum += in_square[ky][kx]*kern[ky][kx];
}
}

//// add bias
//sum += bias[ifm_idx];
} // ifm_idx

// add bias
sum += bias[ofm_idx];
// store result
conv[0][ofm_idx][orix][ocix] = sum;
} // ocix
} // orix

printf("OFM %d\n", ofm_idx);
sprintf(str, "./result/L%02d_Convolution_T0_FN%03d.txt", layer, ofm_idx);
file = fopen(str, "w");
printf("writing convolution result to file %s..\n", str);
for (rix = 0; rix < HEIGHT; rix++) {
fprintf(file, "### kr = %d ##\n", rix);
for (cix = 0; cix < WIDTH; cix++) {
fprintf(file, "%f ",conv[0][ofm_idx][rix][cix]);
if (cix %8 == 7) fprintf(file, "\n");
}
}
fclose(file);
}
printf("Convolution finished\n");


UPDATE : I tried transposing kernel and tried reversing input-output relationship for the extract kernel, all 4 combinations, but it doesn't match. maybe there's something wrong in the way I extracted the values from caffe execution. I'm adding how I extracted bottom, top and weight data from convolution layer in caffe/src/caffe/net.cpp file. They are extracted after the layer processing is done.

//#define LayerShapeExtract
//#define BlobExtract
//#define WeightExtract
#define LayerExtNum 2
#define EXT_ALL_LAYERS 0
int ccc; // for debug
template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
CHECK_GE(start, 0);
CHECK_LT(end, layers_.size());
Dtype loss = 0;
LOG(INFO) << "## : net_input_blobs_.size() : " << net_input_blobs_.size() << std::endl;
if (debug_info_) {
for (int i = 0; i < net_input_blobs_.size(); ++i) {
InputDebugInfo(i);
}
}
for (int i = start; i <= end; ++i) {
ccc = i; printf("ccc = %d\n", ccc); // LOG(ERROR) << "Forwarding " << layer_names_[i];
Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]); // Layer::Forward
loss += layer_loss;
if (debug_info_) { ForwardDebugInfo(i); }

#ifdef LayerShapeExtract
// -------------------------------- start of shape extract ---------------------
FILE *lsfp;
lsfp = fopen("ext/layers.txt","w");
fprintf(lsfp, "####### Layer Shape ########\n");
fprintf(lsfp, "start = %d, end = %d\n", start,end);
for (int li = start; li <= end; ++li) {
fprintf(lsfp, "layer %d type %s num_bottoms %zu num_tops %zu\n", li, layers_[li]->type(), bottom_vecs_[li].size(), top_vecs_[li].size());
for (int bi = 0; bi < bottom_vecs_[li].size(); bi++) {
fprintf(lsfp, "layer %d bottom %d shape \"%s\"\n", li, bi, bottom_vecs_[li][bi]->shape_string().c_str());
}
for (int ti = 0; ti < top_vecs_[li].size(); ti++) {
fprintf(lsfp, "layer %d top %d shape = \"%s\"\n", li, ti, top_vecs_[li][ti]->shape_string().c_str());
}
if (strcmp(layers_[li]->type(),"Convolution") == 0) {
fprintf(lsfp, "layer %d kernel_size = %d\n", li, layers_[li]->layer_param().convolution_param().kernel_size());
}
}
fclose(lsfp);
//printf("##Net::ForwardFromTo ==> ####### End of Layer Shape ########\n");
// -------------------------------- end of shape extract ---------------------
#endif

#ifdef BlobExtract
// -------------------------------- start of blob extract ---------------------

if (i == LayerExtNum || EXT_ALL_LAYERS) { //print!!
char fname[50];
FILE *extfp1;
for (int j = 0; j < bottom_vecs_[i].size(); j++) {

Blob<Dtype>* bp = bottom_vecs_[i][j];
const Dtype *dptr = bp->cpu_data();
if (bp->shape().size() == 4) {
for (int kn = 0; kn < bp->shape()[0]; kn++) { // Blob
for (int kd = 0; kd < bp->shape()[1]; kd++) { // Ni
sprintf(fname, "ext/L%02d_%s_B%d_FN%03d.txt",
i,layers_[i]->type(),j,kd);
extfp1 = fopen(fname,"w");
for (int kr = 0; kr < bp->shape()[2]; kr++) { // Ny
fprintf(extfp1, "### kr = %d ##\n", kr);
for (int kc = 0; kc < bp->shape()[3]; kc++) { // Nx
fprintf(extfp1, "%f ",*dptr++);
if (kc %8 == 7 || kc == bp->shape()[3]-1) fprintf(extfp1, "\n");
}
}
fclose(extfp1);
}
}
} // if size 4
else if (bp->shape().size() == 2) {
for (int kn = 0; kn < bp->shape()[0]; kn++) { // Blob
sprintf(fname, "ext/L%02d_%s_B%d_FN%03d.txt",
i,layers_[i]->type(),j,kn);
extfp1 = fopen(fname,"w");
for (int kd = 0; kd < bp->shape()[1]; kd++) { // Ni
fprintf(extfp1, "%f ",*dptr++);
if (kd %8 == 7 || kd == bp->shape()[1]-1) fprintf(extfp1, "\n");
}
fclose(extfp1);
}
} // if size 2
else {
printf("BSS != 4 or 2, i = %d, j = %d, sz = %zd\n",i,j,bp->shape().size());
}
} // for j

for (int j = 0; j < top_vecs_[i].size(); j++) {

Blob<Dtype>* tp = top_vecs_[i][j];
const Dtype *dptr = tp->cpu_data();
if (tp->shape().size() == 4) { // Blob
for (int kn = 0; kn < tp->shape()[0]; kn++) { // Blob
for (int kd = 0; kd < tp->shape()[1]; kd++) { // Ni
sprintf(fname, "ext/L%02d_%s_T%d_FN%03d.txt",
i,layers_[i]->type(),j,kd);
extfp1 = fopen(fname,"w");
for (int kr = 0; kr < tp->shape()[2]; kr++) { // Ny
fprintf(extfp1, "### kr = %d ##\n", kr);
for (int kc = 0; kc < tp->shape()[3]; kc++) { // Nx
fprintf(extfp1, "%f ",*dptr++);
if (kc %8 == 7 || kc == tp->shape()[3]-1) fprintf(extfp1, "\n");
}
}
fclose(extfp1);
}
}
} // if size 4
else if (tp->shape().size() == 2) {
for (int kn = 0; kn < tp->shape()[0]; kn++) { // Blob
sprintf(fname, "ext/L%02d_%s_T%d_FN%03d.txt",
i,layers_[i]->type(),j,kn);
extfp1 = fopen(fname,"w");
for (int kd = 0; kd < tp->shape()[1]; kd++) { // Ni
fprintf(extfp1, "%f ",*dptr++);
if (kd %8 == 7) fprintf(extfp1, "\n");
}
fclose(extfp1);
}
} // if size 2
else {
printf("BSS != 4 or 2, i = %d, j = %d, sz = %zd\n",i,j,tp->shape().size());
}
} // for j
} // print!!
// -------------------------------- end of blob extract ---------------------
#endif

#ifdef WeightExtract
// -------------------------------- start of weight extract ---------------------
if (i == LayerExtNum || EXT_ALL_LAYERS) { //print!!
char fname[50];
FILE *extfp1;
printf("#*#* Ext layer type = %s\n", layers_[i]->type());
if (strcmp(layers_[i]->type(),"Convolution") == 0){
printf("It's Convolution Layer!\n");
vector<shared_ptr<Blob<Dtype> > >& lyr_blobs = layers_[i]->blobs();
// Layer
printf("layer blob size = %zd\n", lyr_blobs.size());
//const LayerParameter& lyr_param = layers()[i]->layer_param(); // vector shared_ptr Layer
printf("lyr_blobs.size() = %zd\n", lyr_blobs.size());
printf("lyr_blobs[0].shape = %s\n", lyr_blobs[0]->shape_string().c_str());
printf("lyr_blobs[1].shape = %s\n", lyr_blobs[1]->shape_string().c_str()); // Blob
Blob<Dtype> *wp = lyr_blobs[0].get(); // weight // shared_ptr
Blob<Dtype> *bp = lyr_blobs[1].get(); // bias
printf("No Ni ky kx = %d %d %d %d\n", wp->shape()[0], wp->shape()[1], wp->shape()[2], wp->shape()[3]); // Blob
printf("Nb = %d\n", bp->shape()[0]);
int No = wp->shape()[0];
int Ni = wp->shape()[1];
int Ky = wp->shape()[2];
int Kx = wp->shape()[3];
int Nb = bp->shape()[0];
const Dtype *wptr = wp->cpu_data();
const Dtype *bptr = bp->cpu_data();
// save weights first
sprintf(fname, "ext/L%02d_%s_Weights.txt",i,layers_[i]->type());
extfp1 = fopen(fname, "w");
fprintf(extfp1, "## Layer 0 Conv Weights (No = %d, Ni = %d, Ky = %d, Kx = %d, Nb = %d\n", No, Ni, Ky, Kx, Nb);
for (int n = 0; n < No; n++){
fprintf(extfp1, "## For output map %d ##\n", n);
for (int c = 0; c < Ni; c++){
fprintf(extfp1, "## for input map %d ##\n", c);
for (int y = 0; y < Ky; y++) {
for (int x = 0; x < Kx; x++) {
fprintf(extfp1, "%f ", *wptr++);
}
fprintf(extfp1, "\n");
}
fprintf(extfp1, "\n");
}
}
fprintf(extfp1, "## Bias values for outputs ##\n");
for (int n = 0; n < No; n++){
fprintf(extfp1, "%f ",*bptr++);
if (n % 8 == 7) fprintf(extfp1, "\n");
}
fclose(extfp1);
}
} // print!!
// -------------------------------- end of weight extract ---------------------
#endif
}
return loss;
}


ADD : I've run for VGG_ILSVRC_16_layers which came from py-faster-rcnn code. The parameter of the layer I extractedd the data from is as follows. :

layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
convolution_param {
num_output: 64
pad: 1 kernel_size: 3
}
}

Answer

I think the bug lies in fetching bottom data(input) when performing convolution, and the right code should be:

//bottom data index for convolution
int src_r, src_c;
//pads and strides along row and column
int row_pad = 1, col_pad = 1, row_stride = 1, col_stride = 1; 
for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) {
  for(orix = 0; orix < HEIGHT; orix++) {
    for(ocix = 0; ocix < WIDTH; ocix++) {  
      sum = 0.; 
      for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) {
        ...
        // zero-value padding (used in caffe)
        //The right indexing for bottom data,
        //your original code is wrong here
        src_r= orix * row_stride - row_pad;
        src_c= ocix * col_stride - col_pad;
        in_square[0][0] = (src_r < 0 || src_c < 0 ||
                           src_r >= HEIGHT || src_c >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r][src_c];
        in_square[0][1] = (src_r < 0 || src_c + 1 < 0 ||
                           src_r >= HEIGHT || src_c + 1 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r][src_c + 1];
        in_square[0][2] = (src_r < 0 || src_c + 2 < 0 ||
                           src_r >= HEIGHT || src_c + 2 >= WIDTH) ? 
                           0 : bottom[0][ifm_idx][src_r][src_c + 2];
        in_square[1][0] = (src_r + 1 < 0 || src_c < 0 ||
                           src_r + 1 >= HEIGHT || src_c >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 1][src_c];
        in_square[1][1] = (src_r + 1 < 0 || src_c + 1 < 0 ||
                           src_r + 1 >= HEIGHT || src_c + 1 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 1][src_c + 1];
        in_square[1][2] = (src_r + 1 < 0 || src_c + 2 < 0 ||
                           src_r + 1 >= HEIGHT || src_c + 2 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 1][src_c + 2];
        in_square[2][0] = (src_r + 2 < 0 || src_c < 0 ||
                           src_r + 2 >= HEIGHT || src_c >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 2][src_c];
        in_square[2][1] = (src_r + 2 < 0 || src_c + 1 < 0 ||
                           src_r + 2 >= HEIGHT || src_c + 1 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 2][src_c + 1];
        in_square[2][2] = (src_r + 2 < 0 || src_c + 2 < 0 ||
                           src_r + 2 >= HEIGHT || src_c + 2 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 2][src_c + 2];

        //apply kernel
        ...
      }
    }
  }
}