Kirill Vishnyakov Kirill Vishnyakov - 2 months ago 6
C Question

Merge Sort for multiple types

I'm trying to implement a Merge Sort algorithm that can work with integers, strings and char. Unfortunately, I found that it doesn't work properly with integers when the array length is odd.

For example: Input:

2 2 3 7 1 2 1
. Output:
2 2 3 7 0 1 1
.

Now I'm trying to find the mistake in my code. Here it is:

#include <stddef.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "MergeSort.h"

int StrCmp(char *st1, char *st2) {
char *str1 = st1, *str2 = st2;
while (*str1 == *str2 && *str1 != 0)
str1++, str2++;
return *str1 - *str2;
}

int CompareInt(void *a, void *b) {
return *(int *)a - *(int *)b;
}

int CompareChar(void *a, void *b) {
return *(char *)a - *(char *)b;
}

int CompareStr(const void *a, const void *b) {
return strcmp(*(char **)a, *(char **)b);
}

int merge(void *base, size_t num, size_t el_size, int (*compar)(const void*, const void*)) {
size_t size = (size_t)num / 2;
char *char_base = (char *)base;
char *first = malloc(size * el_size);
char *second = malloc((num - size) * el_size);

for (int i = 0; i < size; i++)
for (int j = 0; j < el_size; j++)
first[i * el_size + j] = char_base[i * el_size + j];

for (int i = 0; i < num - size; i++)
for (int j = 0; j < el_size; j++)
second[i * el_size + j] = char_base[size * el_size + i * el_size + j];

size_t i = 0, j = 0, c = 0;
while (i < size && j < num - size) {
if (compar(&first[i * el_size], &second[j * el_size]) <= 0) {
for (int k = 0; k < el_size; k++)
char_base[el_size * c + k] = first[i * el_size + k];
i++;
} else {
for (int k = 0; k < el_size; k++)
char_base[el_size * c + k] = second[j * el_size + k];
j++;
}
c++;
}

if (i == size) {
while (j < num - size) {
for (int k = 0; k < el_size; k++)
char_base[el_size * c + k] = second[j * el_size + k];
j++;
c++;
}
} else {
while (i < size) {
for (int k = 0; k < el_size; k++)
char_base[el_size * c + k] = first[i * el_size + k];
i++;
c++;
}
}
free(first);
free(second);
}

int merge_sort(void *base, size_t num, size_t el_size, int (*compar)(const void*, const void*)) {
if (num > 1) {
size_t s = num / 2;
char *char_base = base;
merge_sort(char_base, s, el_size, compar);
merge_sort(char_base + (num - s) * el_size, num - s, el_size, compar);
merge(char_base, num, el_size, compar);
}
}

int main() {
int nums[] = { 2, 2, 3, 7, 1, 2, 1 };
cmp_t cmp = CompareInt;
merge_sort(nums, 7, sizeof(int), cmp);
for (int i = 0; i < 7; i++)
printf("%i ", nums[i]);
return 0;
}

Answer

The bug is in function merge_sort(): the second recursive call is done on the wrong base address:

merge_sort(char_base + (num - s) * el_size, num - s, el_size, compar);

fix is with

merge_sort(char_base + s * el_size, num - s, el_size, compar);

Note that there are other issues in your code:

  • the comparison functions have incorrect signatures, they should take const void * arguments.

  • both merge() and merge_sort() should be defined as void since they return no value.

  • CompareInt cannot handle large integer values whose difference exceeds INT_MAX, such as INT_MAX and INT_MIN. It should be written:

    int CompareInt(const void *a, const void *b) {
        int na = *(const int *)a;
        int nb = *(const int *)b;
        return (nb < na) - (na < nb);
    }
    
  • you should print a '\n' after the numbers.

You could also improve the implementation in various ways:

  • if you computed s as (n + 1) / 2, you could use less memory and have a simpler and faster implementation as you would not need the second array in the merge function.

  • using pointers, you would drastically reduce the number of multiplications.

Here is a simpler implementation with the same semantics:

void merge(void *base, size_t num, size_t el_size, size_t size,
           int (*compar)(const void*, const void*)) {
    size_t split = size * el_size;
    char *first = malloc(split);
    char *p1 = memcpy(first, base, split);
    char *dest = base, *p2 = dest + split;
    size_t i = 0, j = size;
    while (i < size) {
        if (j == num || compar(p1, p2) <= 0) {
            for (size_t k = 0; k < el_size; k++)
                *dest++ = *p1++;
            i++;
        } else {
            for (size_t k = 0; k < el_size; k++)
                *dest++ = *p2++;
            j++;
        }
    }
    free(first);
}

void merge_sort(void *base, size_t num, size_t el_size,
                int (*compar)(const void*, const void*)) {
    if (num > 1) {
        size_t s = (num + 1) / 2;
        char *char_base = base;
        merge_sort(char_base, s, el_size, compar);
        merge_sort(char_base + s * el_size, num - s, el_size, compar);
        merge(char_base, num, el_size, s, compar);
    }
}