EnryFan EnryFan - 1 month ago 7
C++ Question

Why is strcmp so much faster than my function?

I wrote a function,

Str::Compare
, that is basically a
strcmp
rewritten in another way.
While comparing the two function, in a loop repeted 500'000'000 times,
strcmp
execute too much fast, about x750 times faster.

This code was compiled in a C library with
-Os
parameter active:

int Str::Compare(char* String_1, char* String_2)
{
char TempChar_1, TempChar_2;

do
{
TempChar_1 = *String_1++;
TempChar_2 = *String_2++;
} while(TempChar_1 && TempChar_1 == TempChar_2);

return TempChar_1 - TempChar_2;
}


The execution time of that function is
3.058s
, while
strcmp
only
0.004s
.

Why this happen?

Also this is how I implemented the benchmark loop:

int main()
{
char Xx[] = {"huehuehuehuehuehuehuehuehuehuehuehuehuehue"},
Yy[] = {"huehuehuehuehuehuehuehuehuehuehuehuehuehue"};
for(int i = 0; i < 500000000; ++i)
Str::Compare(Xx, Yy);
}


Edit:
While testing some code I wrote and optimization that improved drastically
Str::Compare
speed.
If before
strcmp
was x750 times faster now is only x250. This is the new code:

int Str::Compare(char* String_1, char* String_2)
{
char TempChar_1, TempChar_2, TempChar_3;

while(TempChar_1 && !TempChar_3)
{
TempChar_1 = *String_1++;
TempChar_2 = *String_2++;
TempChar_3 = TempChar_1 ^ TempChar_2;
}

return TempChar_1 - TempChar_2;
}


New execution time is
0.994s
.

lwi lwi
Answer

I was curious about it and build a test program:

#include <string.h>

compare(char* String_1, char* String_2)
{
    char TempChar_1,
         TempChar_2;

   do
   {
        TempChar_1 = *String_1++;
        TempChar_2 = *String_2++;
   } while(TempChar_1 && TempChar_1 == TempChar_2);

   return TempChar_1 - TempChar_2;
}


int main(){
    int i=strcmp("foo","bar");
    int j=compare("foo","bar");

    return i;
}

I compiled it to assembler with gcc -S -Os test.cusing gcc 4.7.3 resulting in the following assembler:

    .file   "test.c"
    .text
    .globl  compare
    .type   compare, @function
compare:
.LFB24:
    .cfi_startproc
    xorl    %edx, %edx
.L2:
    movsbl  (%rdi,%rdx), %eax
    movsbl  (%rsi,%rdx), %ecx
    incq    %rdx
    cmpb    %cl, %al
    jne .L4
    testb   %al, %al
    jne .L2
.L4:
    subl    %ecx, %eax
    ret
    .cfi_endproc
.LFE24:
    .size   compare, .-compare
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "bar"
.LC1:
    .string "foo"
    .section    .text.startup,"ax",@progbits
    .globl  main
    .type   main, @function
main:
.LFB25:
    .cfi_startproc
    movl    $.LC0, %esi
    movl    $.LC1, %edi
    call    compare
    movl    $1, %eax
    ret
    .cfi_endproc
.LFE25:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.7.3-1ubuntu1) 4.7.3"
    .section    .note.GNU-stack,"",@progbits

I am not that good in x86 assembler but as far as I see it the call to the strcmp is removed and simply replaced by a constant expression ( movl $1, %eax ). So if you use a constant expression for your tests, gcc probably optimizes the strcmp to a constant.