Mathieu Borderé Mathieu Borderé - 3 months ago 20
C Question

gcc -O optimization: Help me understand the effect

So, I'm learning C and I'm currently going through Computer Systems: A Programmer's Perspective 3rd Edition and associated labs.
I'm now doing the first lab for which I have to implement (and have thus implemented) the following function.

/*
* fitsBits - return 1 if x can be represented as an
* n-bit, two's complement integer.
* 1 <= n <= 32
* Examples: fitsBits(5,3) = 0, fitsBits(-4,3) = 1
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 15
* Rating: 2
*/
int fitsBits(int x, int n) {
int sign_bit = (x >> 31 & 1);
int minus_one = ~1+1;
int n_minus_one = n + minus_one;
return (!(x >> n_minus_one) & !sign_bit)
| (!(~(x >> n_minus_one)) & sign_bit);
}


This function will run _a_lot_ of testcases against the following test function.

int test_fitsBits(int x, int n)
{
int TMin_n = -(1 << (n-1));
int TMax_n = (1 << (n-1)) - 1;
return x >= TMin_n && x <= TMax_n;
}


Now, here is where the weird stuff happens: by default the code is compiled with the following flags -O -Wall -m32

Running my code against the test function produces the following assertion fail: ERROR: Test fitsBits(-2147483648[0x80000000],32[0x20]) failed...
...Gives 1[0x1]. Should be 0[0x0]


It seems that my code is correct and the testcode is bogus. Upon further investigation it seems that the test_function produces following intermediary results:

> Tmin:-2147483648
> TMax_n:2147483647
> x: -2147483648
> x >= TMin_n: 1
> x <= TMax_n: 0
> result: 0


Obviously -2147483648 <= 2147483647, but the comparison somehow produces a 0.

If I compile this program without the -O flag, all the tests pass successfully.
Can somebody shed some light on this behaviour please?

EDIT: Sorry the Assembly Code is horrible layout, don't know exactly how to fix quickly

Assembly Code without -O;

.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.globl _test_fitsBits
.align 4, 0x90
_test_fitsBits: ## @test_fitsBits
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
xorl %eax, %eax
movb %al, %cl
movl $1, %eax
xorl %edx, %edx
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl -8(%rbp), %esi
subl $1, %esi
movb %cl, -17(%rbp) ## 1-byte Spill
movl %esi, %ecx
## kill: CL<def> ECX<kill>
movl %eax, %esi
shll %cl, %esi
subl %esi, %edx
movl %edx, -12(%rbp)
movl -8(%rbp), %edx
subl $1, %edx
movl %edx, %ecx
## kill: CL<def> ECX<kill>
shll %cl, %eax
subl $1, %eax
movl %eax, -16(%rbp)
movl -4(%rbp), %eax
cmpl -12(%rbp), %eax
movb -17(%rbp), %cl ## 1-byte Reload
movb %cl, -18(%rbp) ## 1-byte Spill
jl LBB0_2
## BB#1:
movl -4(%rbp), %eax
cmpl -16(%rbp), %eax
setle %cl
movb %cl, -18(%rbp) ## 1-byte Spill
LBB0_2:
movb -18(%rbp), %al ## 1-byte Reload
andb $1, %al
movzbl %al, %eax
popq %rbp
retq
.cfi_endproc

.globl _main
.align 4, 0x90
_main: ## @main
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp3:
.cfi_def_cfa_offset 16
Ltmp4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp5:
.cfi_def_cfa_register %rbp
xorl %eax, %eax
movl $0, -4(%rbp)
movl %edi, -8(%rbp)
movq %rsi, -16(%rbp)
popq %rbp
retq
.cfi_endproc


.subsections_via_symbols


Assembly Code with -O:

.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.globl _test_fitsBits
.align 4, 0x90
_test_fitsBits: ## @test_fitsBits
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
## kill: ESI<def> ESI<kill> RSI<def>
leal -1(%rsi), %ecx
movl $1, %eax
## kill: CL<def> CL<kill> ECX<kill>
shll %cl, %eax
movl %eax, %ecx
negl %ecx
cmpl %edi, %eax
setg %al
cmpl %ecx, %edi
setge %cl
andb %al, %cl
movzbl %cl, %eax
popq %rbp
retq
.cfi_endproc

.globl _main
.align 4, 0x90
_main: ## @main
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp3:
.cfi_def_cfa_offset 16
Ltmp4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp5:
.cfi_def_cfa_register %rbp
xorl %eax, %eax
popq %rbp
retq
.cfi_endproc


.subsections_via_symbols

Answer

oauh's answer addresses why this is undefined behaviour, but not why the problem only appears with -O.

-O makes the compiler try harder to find things to optimize. In this case, it seems to have realised that x <= (1 << (n-1)) - 1 is really the same as x < (1 << (n-1)) - so it's been able to delete the - 1.

However, this is only equivalent to the old code if 1 << (n-1) doesn't overflow. The compiler does this optimization anyway, because the result is allowed to be wrong if overflow happens - which is because anything is allowed if overflow happens, because it's undefined behaviour.