franck franck - 16 days ago 6
C Question

Hex-rays failing to decompile assembly code using an array

I'm compiling a TASM assembly file (to be run in protected mode) with this function that is using global variables:

array dd 0,29535,29535,29096,27889,35168,39271,24798,29855,29772,38197,22395
dd 56219,56131,56121,56133,24256,24229,24412,51616,37038,28755,29069,28107,46700,31766,30517,35050,33790,00000,0000,64000,64000,64000,64000,64000
dd 32190,10299,64000,25841,9185,25203,24473,25203,39396,64000,64000,64000,64000,64000,64000,64000,64000,15266,50285,25477,64000,64000
dd -1

adderVar EQU 0255442

result dd ?

modifyList proc near
PUSHAD
PUSH DS ES
mov edi,offset array
mov eax,adderVar
e:
cmp dword ptr [edi],-1
je f
add dword ptr eax,[edi]
mov dword ptr [edi],eax
add edi,4
jmp e
f:
mov [result],eax
POP ES DS
POPAD
ret
modifyList endp


This code adds a fixed value (adderVar) to the first element of the array, and then the sum of all the previous elements to the next ones, stopping at an end delimiter (-1).

Loading the .OBJ file into IDA and creating a C file using Hex-rays generates this code:

int dword_3408E = 0; // weak
int dword_34092 = 29535; // weak
int dword_34096 = 29535; // weak
int dword_3409A = 29096; // weak
int dword_3409E = 27889; // weak
int dword_340A2 = 35168; // weak
int dword_340A6 = 39271; // weak
int dword_340AA = 24798; // weak
int dword_340AE = 29855; // weak
int dword_340B2 = 29772; // weak
int dword_340B6 = 38197; // weak
int dword_340BA = 22395; // weak
int dword_340BE = 56219; // weak
int dword_340C2 = 56131; // weak
int dword_340C6 = 56121; // weak
int dword_340CA = 56133; // weak
int dword_340CE = 24256; // weak
int dword_340D2 = 24229; // weak
int dword_340D6 = 24412; // weak
int dword_340DA = 51616; // weak
int dword_340DE = 37038; // weak
int dword_340E2 = 28755; // weak
int dword_340E6 = 29069; // weak
int dword_340EA = 28107; // weak
int dword_340EE = 46700; // weak
int dword_340F2 = 31766; // weak
int dword_340F6 = 30517; // weak
int dword_340FA = 35050; // weak
int dword_3411A = 64000; // weak
int dword_3411E = 32190; // weak
int dword_34126 = 64000; // weak
int dword_3412A = 25841; // weak
int dword_3412E = 9185; // weak
int dword_34132 = 25203; // weak
int dword_34136 = 24473; // weak
int dword_3413A = 25203; // weak
int dword_3415E = 64000; // weak
int dword_34162 = 15266; // weak
int dword_34166 = 50285; // weak
int dword_3417A; // weak


__int64 __usercall sub_167AA@<edx:eax>(__int64 a1@<edx:eax>)
{
int *v1; // edi@1
signed int v2; // eax@1
__int64 v4; // [sp-20h] [bp-20h]@1

v4 = a1;
v1 = &dword_3408E;
v2 = 255442;
while ( *v1 != -1 )
{
v2 += *v1;
*v1 = v2;
++v1;
}
dword_3417A = v2;
return v4;
}
// 3408E: using guessed type int dword_3408E;
// 3417A: using guessed type int dword_3417A;


It seems that the values 25477,64000,64000 and the -1 delimiter are missing from the variables, and that the C code is assuming all int will be adjacent.

Would anyone have a guess on what is causing Hex-rays to fail, and how to modify the assembly code to have a proper C output?

Answer

Hex-rays doesn't work properly with manually crafted code. I noticed some differences when trying to load a Nasm ELF/DWARF .OBJ, but the result is still bad.

Currently the decompiler can handle compiler generated code. Manually crafted code may be decompiled too but the results are usually worse than for compiler code.