Miroslav Cetojevic Miroslav Cetojevic - 1 month ago 19
C Question

Radix Sort for floats in C - negative values get corrupted

I'm translating Michael Herf's radix sort into straight C code. For positive values it works just fine, but the negative values are getting corrupted, as seen here:

Input:
-0.100000 0.100000 0.000000 0.700000 0.900000 -0.400000 -1.000000 0.200000 -0.400000 -0.700000
Expected output:
-1.000000 -0.700000 -0.400000 -0.400000 -0.100000 0.000000 0.100000 0.200000 0.700000 0.900000
Actual output:
-44.799999 -11.200000 -11.200000 -6.400000 -4.000000 0.000000 0.100000 0.200000 0.700000 0.900000


How to fix this?

Code (on Eclipse CDT, Ubuntu 16.10, compiled with GCC 6.2):

#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <xmmintrin.h> // for prefetch

#define PFVAL 64
#define PFVAL2 128
#define PF(x, i) _mm_prefetch((char *)((x)+(i)+PFVAL), 0)
#define PF2(x, i) _mm_prefetch((char *)((x)+(i)+PFVAL2), 0)

#define HISTOGRAM 2048
#define HISTOGRAMS 6144 // 3*histogram

#define FLIP_FLOAT_A(x) ((x)^(((x) >> 31) | 0x80000000))
#define FLIP_FLOAT_B(x) ((x)^((((x) >> 31)-1) | 0x80000000))

// utils for accessing 11-bit quantities
#define _0(x) (x & 0x7FF)
#define _1(x) (x >> 11 & 0x7FF)
#define _2(x) (x >> 22 )

void radixsort_float(float *a, size_t num) {
uint32_t *base = (uint32_t *)a;
size_t arraysize = num*sizeof(uint32_t);
uint32_t *aux = malloc(arraysize);

uint32_t byte0[HISTOGRAMS] = { 0 };
uint32_t *byte1 = byte0+HISTOGRAM;
uint32_t *byte2 = byte1+HISTOGRAM;

// 1. parallel histogramming pass
for(size_t i = 0; i < num; ++i) {
PF(base, i);

uint32_t bi = FLIP_FLOAT_A(base[i]);

++byte0[_0(bi)];
++byte1[_1(bi)];
++byte2[_2(bi)];
}

// 2. sum the histograms: each histogram entry
// records the number of values preceding itself
uint32_t sum0 = 0, sum1 = 0, sum2 = 0;
uint32_t total;
for(size_t i = 0; i < HISTOGRAM; i++) {
total = byte0[i]+sum0;
byte0[i] = sum0-1;
sum0 = total;

total = byte1[i]+sum1;
byte1[i] = sum1-1;
sum1 = total;

total = byte2[i]+sum2;
byte2[i] = sum2-1;
sum2 = total;
}

// byte 0: floatflip entire value, read/write histogram, write out flipped
for(size_t i = 0; i < num; ++i) {
uint32_t bi = FLIP_FLOAT_A(base[i]);
uint32_t pos = _0(bi);

PF2(base, i);
aux[++byte0[pos]] = bi;
}

// byte 1: read/write histogram, copy
// aux -> array
for(size_t i = 0; i < num; ++i) {
uint32_t ai = aux[i];
uint32_t pos = _1(ai);
PF2(aux, i);
base[++byte1[pos]] = ai;
}

// byte 2: read/write histogram, copy & flip out
// base -> aux
for(size_t i = 0; i < num; ++i) {
uint32_t bi = base[i];
uint32_t pos = _2(bi);

PF2(base, i);
aux[++byte2[pos]] = FLIP_FLOAT_B(bi);
}

memcpy(base, aux, arraysize);
free(aux);
}

Answer

The problem is your FLIP_FLOAT_A macro -- it's just wrong. You need it to flip all the bits of negative numbers (not just the bottom bit), and for the macros to be the reverse of each other (so FLIP_FLOAT_B(FLIP_FLOAT_A(X)) == X for all values of X). Try:

#define FLIP_FLOAT_A(x) ((x)^(((~(x) >> 31)-1) | 0x80000000))
#define FLIP_FLOAT_B(x) ((x)^((((x) >> 31)-1) | 0x80000000))