ybakos ybakos - 3 months ago 10
C Question

How to improve this flawed doubly-linked list insert implementation efficiently?

Say you have a simple DLL (doubly-linked list) implementation in C like so:

struct list {
list* prev;
list* next;
}


And you intend to implement elements in a
list
like this:

struct element {
int value;
struct list link;
}


With an
insert
operation like this:

void list_insert(struct list* list, struct list* element) {
element->prev = list;
element->next = list->next;
list->next = element;
element->next->prev = element;
}


And, lastly, you use your implementation like this:

// Initialize empty list
struct list mylist;
mylist.next = &mylist;
mylist.prev = &mylist;

// Initialize element
struct element e;
e.value = 42;

// Add element to list
list_insert(&mylist, &e.link);


This basically works well, but does not seem robust against the possibility of inserting
e
into
mylist
twice, unintentionally. What is a computationally efficient way of handling this, ideally in O(1) time?

Reference: The list design comes from the Wayland project. See wayland-util.h, and wayland-util.c.

Answer

You've combined and/or confused your list and list element structs. Lists typically have head and next elements rather than next and prev. The latter is usually for the element [and not the list].

And, unless you really wanted a "list of lists", the pointers within the list struct need to be pointers to the element and not lists. And, you wouldn't want to initialize the head/tail pointers within that list struct to point back to the same list.

Also, just because a list has two pointers does not make it a doubly linked list. It could be a singly linked list if the element only has a next pointer. To make it a doubly linked list, each element needs a next and prev pointer.

There's a bit more to it. I've created a cleaned up and annotated version of your program. Please note the other changes as well:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// element within list
typedef struct element {
    int value;                          // data value
    struct element *next;               // pointer to next element in list
    struct element *prev;               // pointer to previous element in list
} element_t;

// list of elements
typedef struct list {
    element_t *head;                    // pointer to first element in list
    element_t *tail;                    // pointer to last element in list
} list_t;

// new_element -- get new element
element_t *
new_element(int value)
{
    element_t *newptr;

    newptr = calloc(1,sizeof(element_t));

    if (newptr == NULL) {
        printf("new_element: malloc failure\n");
        exit(1);
    }

    newptr->value = value;

    return newptr;
}

// list_insert -- insert before list head
void
list_insert(list_t *list,element_t *newptr)
{
    element_t *head;

    head = list->head;

    newptr->prev = NULL;
    newptr->next = head;

    // insert at head of empty list
    if (head == NULL)
        list->tail = newptr;

    // insert at head of non-empty list
    else {
        newptr->next = head;
        head->prev = newptr;
    }

    // make new element the head of the list
    list->head = newptr;
}

// list_find_value -- find existing list element by value
element_t *
list_find_value(list_t *list,int value)
{
    element_t *curptr;

    // search for pre-existing element with the same value
    for (curptr = list->head;  curptr != NULL;  curptr = curptr->next) {
        if (curptr->value == value)
            break;
    }

    return curptr;
}

// list_insert_unique -- insert before list head
void
list_insert_unique(list_t *list,element_t *newptr)
{
    element_t *curptr;

    // search for pre-existing element with the same value
    curptr = list_find_value(list,newptr->value);

    // only insert if we did _not_ find one
    if (curptr == NULL)
        list_insert(list,newptr);
    else
        free(newptr);
}

// list_print -- print the list
void
list_print(list_t *list)
{
    element_t *curptr;

    printf("list_print:");

    // search for pre-existing element with the same value
    for (curptr = list->head;  curptr != NULL;  curptr = curptr->next)
        printf(" %d",curptr->value);

    printf("\n");
}

// list_print -- print the list in reverse order
void
list_rprint(list_t *list)
{
    element_t *curptr;

    printf("list_rprint:");

    // search for pre-existing element with the same value
    for (curptr = list->tail;  curptr != NULL;  curptr = curptr->prev)
        printf(" %d",curptr->value);

    printf("\n");
}

// main -- main program
int
main(void)
{
    list_t mylist;
    element_t *e;

    // initialize list
    mylist.head = NULL;
    mylist.tail = NULL;

    // Add unique element to list
    e = new_element(42);
    list_insert_unique(&mylist,e);

    e = new_element(23);
    list_insert_unique(&mylist,e);

    e = new_element(17);
    list_insert_unique(&mylist,e);

    // try to insert a duplicate
    e = new_element(17);
    list_insert_unique(&mylist,e);

    // try to insert a duplicate
    e = new_element(42);
    list_insert_unique(&mylist,e);

    // print list in order [checks the head/next pointers]
    list_print(&mylist);

    // print list in reverse order [checks the tail/prev pointers]
    list_rprint(&mylist);

    return 0;
}

UPDATE:

But the question is asking for an improvement of the insert logic of the given list implementation.

You've been on SO long enough to know not to do a few things you've done in this question.

You posted code that was broken and wouldn't even compile. If it had been bug free, it should have been asked on CodeReview instead.

Because of this, the rest of the code became suspect. Now, after reviewing it, I surmise you wanted a doubly linked circular list. But, using list for the doubly linked list pointers is deceptive. It would have been more clear if this had been called link.

You asked about preventing duplicates [in O(1)]:

does not seem robust against the possibility of inserting e into mylist twice, unintentionally.

This was not completely clear as to the criteria (i.e. pointer match, value match, or both).

You didn't provide any code to check for duplicates, just list_insert that did a insert at the head of the list.

What is a computationally efficient way of handling this, ideally in O(1) time?

Insertion at head of list is O(1), but any check for duplicates requires a full list scan, which is O(n). This seemed to me to be basic. Not necessarily figuring out O(n), but at least knowing it could not be O(1)

So, given all this, you shouldn't be surprised if your code gets refactored.

Even [now] knowing about the circular list of links, I wouldn't recommend doing it the way you did it, so I refactored. With your method, you "lose" access to value, so a scan for duplicate based on value becomes more problematic.

What I did was to embed the links into the element. This makes virtually all the code simpler. With the way you did it, then element_t is a sub-class that "inherits" from link_t (nee list_t). This would be okay for C++, but it makes the code a bit messier in C. I've created a version below that does it this way, just to show how much more complex the code becomes.

Perhaps most egregious of all, you edited your original code to "fix" the broken part. You should know to not do this, but append the fix to the bottom of your post as it makes answers no longer match the question.

Also, the Wayland code/link seems to have little matchup to your posted code. You didn't even try to relate the two in any specific way.


Here is a better list_insert_unique function:

// list_insert_unique -- insert before list head
void
list_insert_unique(list_t *list,element_t *newptr)
{
    int dupflg;
    element_t *curptr;

    // search for pre-existing element
    dupflg = 0;
    for (curptr = list->head;  curptr != NULL;  curptr = curptr->next) {
        // prevent list corruption
        if (curptr == newptr) {
            dupflg = -1;
            break;
        }

        // don't add duplicate values (we must continue to search for a
        // pointer match)
        if (curptr->value == value)
            dupflg = 1;
    }

    do {
        // we're trying to re-add the _same_ node
        if (dupflg < 0)
            break;

        // we're trying to add a different node but with an existing value
        if (dupflg) {
            free(newptr);
            break;
        }

        // only insert if we did _not_ find one
        list_insert(list,newptr);
    } while (0);
}

Here is a variation that preserves more of your original structure. It is non-circular, but that difference is relatively minor. Notice that it requires more "upcast" and/or "downcast" between the super class and the sub-class.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>

// element within list
typedef struct link link_t;
struct link {
    link_t *next;                   // pointer to next link in list
    link_t *prev;                   // pointer to previous link in list
};

// list of elements
typedef struct list {
    link_t *head;                   // pointer to first element in list
    link_t *tail;                   // pointer to last element in list
} list_t;

// value types
typedef enum {
    VTYPE_INT = 1,
    VTYPE_FLT = 2
} vtype_t;

// value within list
typedef struct value {
    link_t link;                    // list linkage
    vtype_t type;                   // value type
    union {
        int int_value;              // data value
        double flt_value;           // data value
    };
} element_t;

#define ELEMENTOF(_eptr)    ((element_t *) (_eptr))
#define LINKOF(_vptr)       ((link_t *) (_vptr))

// new_element -- get new element
element_t *
new_element(vtype_t type,...)
{
    va_list ap;
    element_t *newptr;

    newptr = calloc(1,sizeof(element_t));

    if (newptr == NULL) {
        printf("new_element: malloc failure\n");
        exit(1);
    }

    newptr->type = type;

    va_start(ap,type);

    switch (type) {
    case VTYPE_INT:
        newptr->int_value = va_arg(ap,int);
        break;
    case VTYPE_FLT:
        newptr->flt_value = va_arg(ap,double);
        break;
    }

    va_end(ap);

    return newptr;
}

// list_insert -- insert before list head [O(1)]
void
list_insert(list_t *list,link_t *newptr)
{
    link_t *head;

    head = list->head;

    newptr->prev = NULL;
    newptr->next = head;

    // insert at head of empty list
    if (head == NULL)
        list->tail = newptr;

    // insert at head of non-empty list
    else {
        newptr->next = head;
        head->prev = newptr;
    }

    // make new element the head of the list
    list->head = newptr;
}

// list_find_value -- find existing list element by value [O(n)]
link_t *
list_find_value(list_t *list,element_t *newval,int *matchptr)
{
    int dupflg;
    element_t *curval;
    link_t *curptr;

    // search for pre-existing element with the same value
    dupflg = 0;
    for (curptr = list->head;  curptr != NULL;  curptr = curptr->next) {
        curval = ELEMENTOF(curptr);

        // don't allow the list to become corrupted
        if (curval == newval) {
            dupflg = -1;
            break;
        }

        // only compare values of similar type
        if (curval->type != newval->type)
            continue;

        switch (newval->type) {
        case VTYPE_INT:
            if (curval->int_value == newval->int_value)
                dupflg = 1;
            break;
        case VTYPE_FLT:
            if (curval->flt_value == newval->flt_value);
                dupflg = 1;
            break;
        }
    }

    if (matchptr != NULL)
        *matchptr = dupflg;

    return curptr;
}

// list_insert_unique -- insert before list head
void
list_insert_unique(list_t *list,element_t *newval)
{
    link_t *curptr;
    int dupflg;

    // search for pre-existing element with the same value
    curptr = list_find_value(list,newval,&dupflg);

    // only insert if we did _not_ find one
    do {
        // prevent double free
        if (dupflg < 0)
            break;

        if (dupflg) {
            free(newval);
            break;
        }

        list_insert(list,LINKOF(newval));
    } while (0);
}

// value_print -- print a single value
void
value_print(link_t *curptr)
{
    element_t *valptr;

    valptr = ELEMENTOF(curptr);

    switch (valptr->type) {
    case VTYPE_INT:
        printf(" I:%d",valptr->int_value);
        break;
    case VTYPE_FLT:
        printf(" F:%g",valptr->flt_value);
        break;
    }
}

// list_print -- print the list
void
list_print(list_t *list)
{
    link_t *curptr;

    printf("list_print:");

    for (curptr = list->head;  curptr != NULL;  curptr = curptr->next)
        value_print(curptr);

    printf("\n");
}

// list_rprint -- print the list in reverse order
void
list_rprint(list_t *list)
{
    link_t *curptr;

    printf("list_rprint:");

    for (curptr = list->tail;  curptr != NULL;  curptr = curptr->prev)
        value_print(curptr);

    printf("\n");
}

// main -- main program
int
main(void)
{
    list_t mylist;
    element_t *e;

    // initialize list
    mylist.head = NULL;
    mylist.tail = NULL;

    // Add unique element to list
    e = new_element(VTYPE_INT,42);
    list_insert_unique(&mylist,e);

    e = new_element(VTYPE_INT,23);
    list_insert_unique(&mylist,e);

    e = new_element(VTYPE_INT,17);
    list_insert_unique(&mylist,e);

    // try to insert a duplicate
    e = new_element(VTYPE_INT,17);
    list_insert_unique(&mylist,e);

    // try to insert a duplicate
    e = new_element(VTYPE_INT,42);
    list_insert_unique(&mylist,e);

    e = new_element(VTYPE_INT,67);
    list_insert_unique(&mylist,e);

    list_insert_unique(&mylist,e);

    // change the value [which also changes the list's value] and try to
    // insert
    e->int_value = 66;
    list_insert_unique(&mylist,e);

    // this is _not_ a duplicate because the type is different
    e = new_element(VTYPE_FLT,42.0);
    list_insert_unique(&mylist,e);

    // print list in order [checks the head/next pointers]
    list_print(&mylist);

    // print list in reverse order [checks the tail/prev pointers]
    list_rprint(&mylist);

    return 0;
}

UPDATE #2:

Super follow-up, thank you. Yes, I'm aware of the original post's errors and edited them for correctness; and of the flaws of the provided implementation.

Okay, I think I've figured out the best of [our :-)] two worlds.

First, it depends upon what a duplicate means. Originally, I took that to be a duplicate value. My updated list_insert_unique had to do a full scan to the end, even if it found a value match halfway, because it had to finish the scan to find a duplicate pointer match. This forced an O(n)

However, if you didn't/don't mind duplicate values, and merely wanted to check for a new node already being on the list (i.e. allowing duplicate values but preventing list corruption), this can be done in O(1) by using a different detection method.

I've split my updated list_insert_unique into two functions:

list_insert_unique_value which is very similar. It is O(n) worst case, but is O(n/2) on average. This is because the value scan can now stop immediately upon finding a value match.

list_insert_unique_pointer which just checks for the new node already being on the list and is always O(1)

This works because it assumes something like my new_element function that does calloc to set the link pointers to NULL. It would not work in your original main without doing e.next = NULL; e.prev = NULL; before calling list_insert.

Below is my original version with these changes. I still recommend the simplicity of embedding the links into element_t but this can be [easily] adapted to the super-class/derived-class model of my second example [similar to your original].

Side note: I've done quite a few doubly linked implementations over the years, using both styles and I prefer embedding when possible. Actually, I have multiple separate code bases and have used the "inheritance" model in one and the "embed" model in another. I did this to experiment with speed, ease-of-use, detection of list corruption or link corruption, etc. Although the linux kernel uses circular lists, I never have, mainly because I couldn't find a use case for making them circular in my own code--YMMV.

The code below is [still] non-circular, but can also be adapted to circular lists. The for loops would have to change to something like:

for (curptr = list->head->next;  curptr != head;  curptr = curptr->next)

For my own code, I [would] usually hide such ugliness in a CPP macro like:

for (FORALL_FORWARD(list,curptr))

Anyway, here it is:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// element within list
typedef struct element {
    int value;                          // data value
    struct element *next;               // pointer to next element in list
    struct element *prev;               // pointer to previous element in list
} element_t;

// list of elements
typedef struct list {
    element_t *head;                    // pointer to first element in list
    element_t *tail;                    // pointer to last element in list
} list_t;

// new_element -- get new element
element_t *
new_element(int value)
{
    element_t *newptr;

    newptr = calloc(1,sizeof(element_t));

    if (newptr == NULL) {
        printf("new_element: malloc failure\n");
        exit(1);
    }

    newptr->value = value;

    return newptr;
}

// list_insert -- insert before list head
void
list_insert(list_t *list,element_t *newptr)
{
    element_t *head;

    head = list->head;

    newptr->prev = NULL;
    newptr->next = head;

    // insert at head of empty list
    if (head == NULL)
        list->tail = newptr;

    // insert at head of non-empty list
    else {
        newptr->next = head;
        head->prev = newptr;
    }

    // make new element the head of the list
    list->head = newptr;
}

// list_insert_unique_pointer -- insert before list head [O(1)]
void
list_insert_unique_pointer(list_t *list,element_t *newptr)
{
    int dupflg;

    // check for being on the list -- works for both circular/non-circular
    // NOTE: for circular, this can be reduced to:
    //   dupflg = (newptr->prev != NULL)
    dupflg = (newptr == list->head) || (newptr->prev != NULL);
    //dupflg = -dupflg;

    do {
        // we're trying to re-add the _same_ node -- we'd corrupt the list
        if (dupflg)
            break;

        // only insert if we did _not_ find one
        list_insert(list,newptr);
    } while (0);
}

// list_insert_unique_value -- insert before list head [O(n)]
void
list_insert_unique_value(list_t *list,element_t *newptr)
{
    int dupflg;
    element_t *curptr;
    int value;

    curptr = list->head;

    // check for being on the list -- works for both circular/non-circular
    dupflg = (newptr == curptr) || (newptr->prev != NULL);
    dupflg = -dupflg;

    // search for pre-existing element with the same value
    value = newptr->value;
    for (;  curptr != NULL;  curptr = curptr->next) {
        if (dupflg)
            break;
        dupflg = (curptr->value == value);
    }

    do {
        // we're trying to re-add the _same_ node -- we'd corrupt the list
        if (dupflg < 0)
            break;

        // we're trying to add a different node but with an existing value
        if (dupflg) {
            free(newptr);
            break;
        }

        // only insert if we did _not_ find one
        list_insert(list,newptr);
    } while (0);
}

// list_print -- print the list
void
list_print(list_t *list)
{
    element_t *curptr;

    printf("list_print:");

    for (curptr = list->head;  curptr != NULL;  curptr = curptr->next)
        printf(" %d",curptr->value);

    printf("\n");
}

// list_print -- print the list in reverse order
void
list_rprint(list_t *list)
{
    element_t *curptr;

    printf("list_rprint:");

    for (curptr = list->tail;  curptr != NULL;  curptr = curptr->prev)
        printf(" %d",curptr->value);

    printf("\n");
}

// main -- main program
int
main(void)
{
    list_t mylist;
    element_t *e;

    // initialize list
    mylist.head = NULL;
    mylist.tail = NULL;

    // Add unique element to list
    e = new_element(42);
    list_insert_unique_value(&mylist,e);

    // try to corrupt the list
    list_insert_unique_pointer(&mylist,e);

    e = new_element(23);
    list_insert_unique_value(&mylist,e);

    // try to corrupt the list
    list_insert_unique_pointer(&mylist,e);

    e = new_element(17);
    list_insert_unique_value(&mylist,e);

    // try to insert a duplicate
    e = new_element(17);
    list_insert_unique_value(&mylist,e);

    // try to insert a duplicate
    e = new_element(42);
    list_insert_unique_value(&mylist,e);

    // print list in order [checks the head/next pointers]
    list_print(&mylist);

    // print list in reverse order [checks the tail/prev pointers]
    list_rprint(&mylist);

    return 0;
}