J Mei J Mei - 2 months ago 8
C Question

How to merge sort an array of struct

I have an array of struct of

typedef struct BinaryTreeNode {
BSTElementType userID;
BSTElementType count;
struct BinaryTreeNode *left;
struct BinaryTreeNode *right;
}BinaryTreeNode;

typedef BinaryTreeNode BSTNode;


And I wish to merge sort the array in ascending order. However, when I execute the merge, nothing changes. This is the code I used to create the array of struct, and the function call of the
MergeSort
. The
maxUsers
is an int I got from converting the number of nodes from a binary tree, which should be the max amount of the array.

BSTNode *arr = (BSTNode *)malloc(maxUsers * sizeof(BSTNode));

MergeSort(arr, 0, maxUsers);

void Merge(BSTNode *arr, int low, int mid, int high) {
int mergedSize = high - low + 1;
BSTNode *temp = (BSTNode *)malloc(mergedSize * sizeof(BSTNode));
int mergePos = 0;
int leftPos = 0;
int rightPos = 0;

leftPos = low;
rightPos = mid + 1;

//printf("Starting Merge\n");

while (leftPos <= high && rightPos <= mid) {
if (arr[leftPos].count < arr[rightPos].count) {
temp[mergePos].userID = arr[leftPos].userID;
temp[mergePos].count = arr[leftPos].count;
++leftPos;
}
else {
temp[mergePos].userID = arr[rightPos].userID;
temp[mergePos].count = arr[rightPos].count;
++rightPos;
}
++mergePos;
}

while (leftPos <= mid) {
temp[mergePos].userID = arr[leftPos].userID;
temp[mergePos].count = arr[leftPos].count;
++leftPos;
++mergePos;
}

while (rightPos <= high) {
temp[mergePos].userID = arr[rightPos].userID;
temp[mergePos].count = arr[rightPos].count;
++rightPos;
++mergePos;
}

for (mergePos = 0; mergePos < mergedSize; ++mergePos) {
arr[low + mergePos].userID = temp[mergePos].userID;
arr[low + mergePos].count = temp[mergePos].count;
}
}

void MergeSort(BSTNode *arr, int low, int high) {
int mid = 0;

if (low < high) {
mid = (low + high) / 2;

MergeSort(arr, low, mid);
MergeSort(arr, mid + 1, high);

Merge(arr, low, mid, high);
}
}


Any tips or hints will be much appreciated!

Edit: When I tried to write some printf statements, I noticed that the values are coming out as negatives. But the values that are stored in the structs are positives. Any reason for this error?

printf("Left Pos: %d, Right Pos: %d\n", leftPos, rightPos);
while (leftPos <= mid && rightPos <= high) {
printf("Left Pos count: %d, Right Pos count: %d\n", arr[leftPos].count, arr[rightPos].count);
if (arr[leftPos].count < arr[rightPos].count) {
temp[mergePos].userID = arr[leftPos].userID;
temp[mergePos].count = arr[leftPos].count;
++leftPos;
}

Answer

There are two crucial changes that need to be made:

  1. In Merge, you have while (leftPos <= high && rightPos <= mid) but you need while (leftPos <= mid && rightPos <= high) (reversing the left/right comparison values).

  2. In main() — or, at least, the code that calls MergeSort(), you have MergeSort(arr, 0, maxUsers); but you need MergeSort(arr, 0, maxUsers-1); because the values passed must be such that arr[lo] and arr[hi] are both valid entries in the array, and when you pass maxUsers you are telling it that arr[maxUsers] is valid when it isn't.

A third important change is to add free(temp) before exiting MergeSort().

I also elected to replace your multiline assignments with simple one-line structure assignments.

Here's a debug laden version of the code, but the debug printing is all commented out. It was all active when I resolved the problem. While I was debugging, even though I was using rand() to generate the data, I didn't use srand() quite deliberately so that I was working with the same data on each run. When I was confident that the code was clean, then I used the srand(time(0)); and varied the size of the array, but while debugging, stability was important.

#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>

typedef int BSTElementType;

typedef struct BinaryTreeNode
{
    BSTElementType userID;
    BSTElementType count;
    struct BinaryTreeNode *left;
    struct BinaryTreeNode *right;
} BinaryTreeNode;

typedef BinaryTreeNode BSTNode;

static void PrintArray(const char *tag, int n, BSTNode *array)
{
    printf("%s:\n", tag);
    for (int i = 0; i < n; i++)
        printf("%2d: u = %4d, c = %2d\n", i, array[i].userID, array[i].count);
    putchar('\n');
    fflush(stdout);
}

static
void Merge(BSTNode *arr, int low, int mid, int high)
{
    int mergedSize = high - low + 1;
    BSTNode *temp = (BSTNode *)malloc(mergedSize * sizeof(BSTNode));
    int mergePos = 0;
    int leftPos = low;
    int rightPos = mid + 1;

    //printf("-->> %s: lo = %d, md = %d, hi = %d, sz = %d\n", __func__, low, mid, high, mergedSize);

    //printf("L = %d, M = %d; R = %d, H = %d\n", leftPos, mid, rightPos, high);
    while (leftPos <= mid && rightPos <= high)      // Key change
    {
        //printf("a[%d].c = %d <=> a[%d].c = %d\n", leftPos, arr[leftPos].count,
        //       rightPos, arr[rightPos].count);
        if (arr[leftPos].count < arr[rightPos].count)
        {
            //printf("L1: a[%d].c = %d\n", leftPos, arr[leftPos].count);
            temp[mergePos++] = arr[leftPos++];
        }
        else
        {
            //printf("R1: a[%d].c = %d\n", rightPos, arr[rightPos].count);
            temp[mergePos++] = arr[rightPos++];
        }
        //printf("L = %d, M = %d; R = %d, H = %d\n", leftPos, mid, rightPos, high);
    }

    while (leftPos <= mid)
    {
        //printf("L2: a[%d].c = %d\n", leftPos, arr[leftPos].count);
        temp[mergePos++] = arr[leftPos++];
    }

    while (rightPos <= high)
    {
        //printf("R2: a[%d].c = %d\n", rightPos, arr[rightPos].count);
        temp[mergePos++] = arr[rightPos++];
    }

    assert(mergePos == mergedSize);

    //PrintArray("merged", mergedSize, temp);

    for (mergePos = 0; mergePos < mergedSize; ++mergePos)
        arr[low + mergePos] = temp[mergePos];

    free(temp);         // Key change
    //printf("<<-- %s: lo = %d, md = %d, hi = %d, sz = %d\n", __func__, low, mid, high, mergedSize);
}

static
void MergeSort(BSTNode *arr, int low, int high)
{
    if (low < high)
    {
        int mid = (low + high) / 2;
        //printf("-->> %s: lo = %d, md = %d, hi = %d\n", __func__, low, mid, high);

        MergeSort(arr, low, mid);
        MergeSort(arr, mid + 1, high);

        Merge(arr, low, mid, high);
        //printf("<<-- %s: lo = %d, md = %d, hi = %d\n", __func__, low, mid, high);
    }
}

int main(void)
{
    /* Unstable when testing */
    //srand(time(0));
    //int maxUsers = rand() % 20 + 5;
    /* Stable while debugging */
    int maxUsers = 10;

    BSTNode *arr = (BSTNode *)malloc(maxUsers * sizeof(BSTNode));

    for (int i = 0; i < maxUsers; i++)
    {
        arr[i].userID = rand() % 100 * 100;
        arr[i].count = rand() % 10;
        arr[i].left = 0;
        arr[i].right = 0;
    }

    PrintArray("Before", maxUsers, arr);
    MergeSort(arr, 0, maxUsers - 1);        // Key change
    PrintArray("After", maxUsers, arr);

    free(arr);

    return 0;
}

One of the key debugging observations was that the main loop in Merge() was never entered; that was a strong hint for the first problem. Oddball data was a strong hint for the second problem. The main data is all nicely controlled in range.

Sample output (YMMV because you may not be using the exact same PRNG):

Before:
 0: u = 7900, c =  9
 1: u = 3900, c =  5
 2: u = 5500, c =  3
 3: u = 4300, c =  4
 4: u = 3600, c =  6
 5: u =  900, c =  2
 6: u = 5300, c =  9
 7: u = 6300, c =  1
 8: u = 7900, c =  8
 9: u = 4400, c =  3
10: u = 9400, c =  0

After:
 0: u = 9400, c =  0
 1: u = 6300, c =  1
 2: u =  900, c =  2
 3: u = 4400, c =  3
 4: u = 5500, c =  3
 5: u = 4300, c =  4
 6: u = 3900, c =  5
 7: u = 3600, c =  6
 8: u = 7900, c =  8
 9: u = 5300, c =  9
10: u = 7900, c =  9

Sample run (random amounts of data):

Before:
 0: u = 3300, c =  1
 1: u =  200, c =  8
 2: u = 7500, c =  6
 3: u = 2700, c =  8
 4: u = 8300, c =  3
 5: u = 6900, c =  3
 6: u =  400, c =  3
 7: u = 1100, c =  6
 8: u = 3600, c =  5
 9: u = 2100, c =  7
10: u = 9400, c =  9
11: u = 7300, c =  0
12: u = 1800, c =  4
13: u = 8200, c =  9
14: u = 8400, c =  4
15: u = 9600, c =  0
16: u = 4400, c =  7
17: u = 2900, c =  0
18: u = 4300, c =  4
19: u = 6500, c =  9
20: u = 6800, c =  6
21: u = 8000, c =  2
22: u = 6000, c =  5

After:
 0: u = 2900, c =  0
 1: u = 9600, c =  0
 2: u = 7300, c =  0
 3: u = 3300, c =  1
 4: u = 8000, c =  2
 5: u =  400, c =  3
 6: u = 6900, c =  3
 7: u = 8300, c =  3
 8: u = 4300, c =  4
 9: u = 8400, c =  4
10: u = 1800, c =  4
11: u = 6000, c =  5
12: u = 3600, c =  5
13: u = 6800, c =  6
14: u = 1100, c =  6
15: u = 7500, c =  6
16: u = 4400, c =  7
17: u = 2100, c =  7
18: u = 2700, c =  8
19: u =  200, c =  8
20: u = 6500, c =  9
21: u = 8200, c =  9
22: u = 9400, c =  9

Debug-enabled, fixed-size run:

Before:
 0: u =  700, c =  9
 1: u = 7300, c =  8
 2: u = 3000, c =  2
 3: u = 4400, c =  8
 4: u = 2300, c =  9
 5: u = 4000, c =  5
 6: u = 9200, c =  2
 7: u = 8700, c =  3
 8: u = 2700, c =  9
 9: u = 4000, c =  2

-->> MergeSort: lo = 0, md = 4, hi = 9
-->> MergeSort: lo = 0, md = 2, hi = 4
-->> MergeSort: lo = 0, md = 1, hi = 2
-->> MergeSort: lo = 0, md = 0, hi = 1
-->> Merge: lo = 0, md = 0, hi = 1, sz = 2
L = 0, M = 0; R = 1, H = 1
a[0].c = 9 <=> a[1].c = 8
R1: a[1].c = 8
L = 0, M = 0; R = 2, H = 1
L2: a[0].c = 9
<<-- Merge: lo = 0, md = 0, hi = 1, sz = 2
<<-- MergeSort: lo = 0, md = 0, hi = 1
-->> Merge: lo = 0, md = 1, hi = 2, sz = 3
L = 0, M = 1; R = 2, H = 2
a[0].c = 8 <=> a[2].c = 2
R1: a[2].c = 2
L = 0, M = 1; R = 3, H = 2
L2: a[0].c = 8
L2: a[1].c = 9
<<-- Merge: lo = 0, md = 1, hi = 2, sz = 3
<<-- MergeSort: lo = 0, md = 1, hi = 2
-->> MergeSort: lo = 3, md = 3, hi = 4
-->> Merge: lo = 3, md = 3, hi = 4, sz = 2
L = 3, M = 3; R = 4, H = 4
a[3].c = 8 <=> a[4].c = 9
L1: a[3].c = 8
L = 4, M = 3; R = 4, H = 4
R2: a[4].c = 9
<<-- Merge: lo = 3, md = 3, hi = 4, sz = 2
<<-- MergeSort: lo = 3, md = 3, hi = 4
-->> Merge: lo = 0, md = 2, hi = 4, sz = 5
L = 0, M = 2; R = 3, H = 4
a[0].c = 2 <=> a[3].c = 8
L1: a[0].c = 2
L = 1, M = 2; R = 3, H = 4
a[1].c = 8 <=> a[3].c = 8
R1: a[3].c = 8
L = 1, M = 2; R = 4, H = 4
a[1].c = 8 <=> a[4].c = 9
L1: a[1].c = 8
L = 2, M = 2; R = 4, H = 4
a[2].c = 9 <=> a[4].c = 9
R1: a[4].c = 9
L = 2, M = 2; R = 5, H = 4
L2: a[2].c = 9
<<-- Merge: lo = 0, md = 2, hi = 4, sz = 5
<<-- MergeSort: lo = 0, md = 2, hi = 4
-->> MergeSort: lo = 5, md = 7, hi = 9
-->> MergeSort: lo = 5, md = 6, hi = 7
-->> MergeSort: lo = 5, md = 5, hi = 6
-->> Merge: lo = 5, md = 5, hi = 6, sz = 2
L = 5, M = 5; R = 6, H = 6
a[5].c = 5 <=> a[6].c = 2
R1: a[6].c = 2
L = 5, M = 5; R = 7, H = 6
L2: a[5].c = 5
<<-- Merge: lo = 5, md = 5, hi = 6, sz = 2
<<-- MergeSort: lo = 5, md = 5, hi = 6
-->> Merge: lo = 5, md = 6, hi = 7, sz = 3
L = 5, M = 6; R = 7, H = 7
a[5].c = 2 <=> a[7].c = 3
L1: a[5].c = 2
L = 6, M = 6; R = 7, H = 7
a[6].c = 5 <=> a[7].c = 3
R1: a[7].c = 3
L = 6, M = 6; R = 8, H = 7
L2: a[6].c = 5
<<-- Merge: lo = 5, md = 6, hi = 7, sz = 3
<<-- MergeSort: lo = 5, md = 6, hi = 7
-->> MergeSort: lo = 8, md = 8, hi = 9
-->> Merge: lo = 8, md = 8, hi = 9, sz = 2
L = 8, M = 8; R = 9, H = 9
a[8].c = 9 <=> a[9].c = 2
R1: a[9].c = 2
L = 8, M = 8; R = 10, H = 9
L2: a[8].c = 9
<<-- Merge: lo = 8, md = 8, hi = 9, sz = 2
<<-- MergeSort: lo = 8, md = 8, hi = 9
-->> Merge: lo = 5, md = 7, hi = 9, sz = 5
L = 5, M = 7; R = 8, H = 9
a[5].c = 2 <=> a[8].c = 2
R1: a[8].c = 2
L = 5, M = 7; R = 9, H = 9
a[5].c = 2 <=> a[9].c = 9
L1: a[5].c = 2
L = 6, M = 7; R = 9, H = 9
a[6].c = 3 <=> a[9].c = 9
L1: a[6].c = 3
L = 7, M = 7; R = 9, H = 9
a[7].c = 5 <=> a[9].c = 9
L1: a[7].c = 5
L = 8, M = 7; R = 9, H = 9
R2: a[9].c = 9
<<-- Merge: lo = 5, md = 7, hi = 9, sz = 5
<<-- MergeSort: lo = 5, md = 7, hi = 9
-->> Merge: lo = 0, md = 4, hi = 9, sz = 10
L = 0, M = 4; R = 5, H = 9
a[0].c = 2 <=> a[5].c = 2
R1: a[5].c = 2
L = 0, M = 4; R = 6, H = 9
a[0].c = 2 <=> a[6].c = 2
R1: a[6].c = 2
L = 0, M = 4; R = 7, H = 9
a[0].c = 2 <=> a[7].c = 3
L1: a[0].c = 2
L = 1, M = 4; R = 7, H = 9
a[1].c = 8 <=> a[7].c = 3
R1: a[7].c = 3
L = 1, M = 4; R = 8, H = 9
a[1].c = 8 <=> a[8].c = 5
R1: a[8].c = 5
L = 1, M = 4; R = 9, H = 9
a[1].c = 8 <=> a[9].c = 9
L1: a[1].c = 8
L = 2, M = 4; R = 9, H = 9
a[2].c = 8 <=> a[9].c = 9
L1: a[2].c = 8
L = 3, M = 4; R = 9, H = 9
a[3].c = 9 <=> a[9].c = 9
R1: a[9].c = 9
L = 3, M = 4; R = 10, H = 9
L2: a[3].c = 9
L2: a[4].c = 9
<<-- Merge: lo = 0, md = 4, hi = 9, sz = 10
<<-- MergeSort: lo = 0, md = 4, hi = 9
After:
 0: u = 4000, c =  2
 1: u = 9200, c =  2
 2: u = 3000, c =  2
 3: u = 8700, c =  3
 4: u = 4000, c =  5
 5: u = 4400, c =  8
 6: u = 7300, c =  8
 7: u = 2700, c =  9
 8: u = 2300, c =  9
 9: u =  700, c =  9

The code compiles and runs cleanly under GCC 6.1.0 and Valgrind 3.12-SVN on Mac OS X 10.11.6.