Delan Azabani Delan Azabani - 4 months ago 17
Brainfuck Question

Optimisation for a brainfuck interpreter

As an exercise to help me learn about interpreters and optimisation, neither of which I know anything about, I have written a brainfuck interpreter in C. It appears to work flawlessly thus far, though it does not compete well in execution speed compared to other fast interpreters.

What are some ways that I can change this interpreter to improve performance (or otherwise)?

One interesting aspect of my interpreter (though most others probably do this as well) is that I run one loop that reads through the source input and converts each instruction into a

struct { long instruction; long loop; }


The
loop
value is the index of the matching
]
instruction, if the instruction is a
[
, and the index of the matching
[
instruction, if the instruction is a
]
, allowing quick jumping. I'd imagine that this 'parsing' process (which doesn't take long) improves execution times over doing redundant reparsing to find matching square brackets every time they are needed.

An interesting test of brainfuck interpreter speed is this program:

++++++++[->-[->-[->-[-]<]<]<]>++++++++[<++++++++++>-]<[>+>+<<-]>-.>-----.>



  1. the first version of the interpreter

  2. the interpreter after implementing Jerry Coffin's answer, which removes the giant switch in the runtime loop, by making the
    instruction
    struct's
    instruction
    a direct pointer to an operation function - this runs slower than the previous version (function call overhead?)

  3. the interpreter after reversing the previous change and adding an optimisation to 'collapse' multiple consecutive non-loop operations, reducing loop cycles - this runs slightly faster than the original


Answer

I can see a couple of possibilities. I think the way I'd go would be to turn it into a compiler that produced direct-threaded code. I.e., as you read the input, instead of copying most of "instructions" into memory more or less as-is, I'd instead write the code to implement each instruction as a function, and copy a pointer to each function into memory. Then executing the code would consist of calling those functions in order. I'd probably have that function return the index (or perhaps address) of the next instruction to execute, so you'd end up with something like:

typedef int return_type;
typedef return_type (*f)(void);

f *im = malloc(sizeof(f) * ia);

ci = (*(im[ci]))();

I'd also have three separate functions for each instruction, one for each BF_END_* mode, so you'd only have to deal with that during the "compilation" phase. When you execute the code, you'd have a pointer directly to the correct function.

Edit:

I've been playing with the code a bit. I've separated the loop addresses into a separate array, and merged most of the parsing together, so it looks like this:

for (ii = 0; (i = getc(fp)) != EOF; ++ii) {
    if (++in > ia) {
        ia *= 2;
        im = realloc(im, sizeof(*im) * ia);
        loops = realloc(loops, sizeof(*loops) * ia);
    }
    im[in-1] = i;
    switch (i) {
        case BF_OP_LSTART:
            if (ln >= la)
                ls = realloc(ls, sizeof(*ls) * (la *= 2));
            ls[ln++] = ii;
            break;
        case BF_OP_LEND:
            loops[in-1] = ls[--ln];
            loops[ls[ln]] = ii;
            break;
    }
}

That doesn't make any real difference to the speed, but does make the code a lot shorter, and (at least in my opinion) easier to understand.

Edit2:

Okay, I've had a chance to play with this a bit more, and found one (rather strange) optimization that does seem to help at least a bit. Compilers often produce marginally better code for switch statements with dense case values, so I tried converting to that, and got an improvement of around 9-10% (depending a bit on compiler).

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#define BF_END_ERROR    'e'
#define BF_END_IGNORE   'i'
#define BF_END_WRAP     'w'
#define BF_OP_VINC      '+'
#define BF_OP_VDEC      '-'
#define BF_OP_PINC      '>'
#define BF_OP_PDEC      '<'
#define BF_OP_LSTART    '['
#define BF_OP_LEND      ']'
#define BF_OP_IN        ','
#define BF_OP_OUT       '.'

enum { 
    C_OP_VINC, 
    C_OP_VDEC, 
    C_OP_PINC, 
    C_OP_PDEC, 
    C_OP_LSTART, 
    C_OP_LEND, 
    C_OP_IN, 
    C_OP_OUT 
};

typedef struct {
    long instruction;       /* instruction type */
    long loop;              /* 'other' instruction index in a loop */
} instruction;
void die(const char *s, ...) {
    va_list a;
    va_start(a, s);
    fprintf(stderr, "brief: error: ");
    vfprintf(stderr, s, a);
    putchar(10);
    va_end(a);
    exit(1);
}
int main(int argc, char **argv) {
    unsigned instruction_count = 0;
    long
        ci = 0,             /* current cell index */
        cn = 4096,          /* number of cells to allocate */
        cw = BF_END_WRAP,   /* cell wrap behaviour */
        ia = 4096,          /* number of allocated instructions */
        ii = 0,             /* current instruction index */
        in = 0,             /* number of used instructions */
        la = 4096,          /* loop stack allocation */
        ln = 0,             /* loop stack used */
        va = 0,             /* minimum value */
        vb = 255,           /* maximum value */
        vw = BF_END_WRAP    /* value wrap behaviour */
    ;
    instruction *im = malloc(sizeof(instruction) * ia); /* instruction memory */
    long *cm = NULL;        /* cell memory */
    long *ls = malloc(sizeof(long) * la);               /* loop stack */
    FILE *fp = NULL;
    int i;
    while ((i = getopt(argc, argv, "a:b:c:f:hv:w:")) != -1) {
        switch (i) {
            case 'a': va = atol(optarg); break;
            case 'b': vb = atol(optarg); break;
            case 'c': cn = atol(optarg); break;
            case 'f':
                fp = fopen(optarg, "r");
                if (!fp)
                    die("%s: %s", optarg, strerror(errno));
                break;
            case 'h':
                fputs(
                    "brief: a flexible brainfuck interpreter\n"
                    "usage: brief [options]\n\n"
                    "options:\n"
                    "   -a  set minimum cell value (default 0)\n"
                    "   -b  set maximum cell value (default 255)\n"
                    "   -c  set cells to allocate (default 4096)\n"
                    "   -f  source file name (required)\n"
                    "   -h  this help output\n"
                    "   -v  value over/underflow behaviour\n"
                    "   -w  cell pointer over/underflow behaviour\n\n"
                , stderr);
                fputs(
                    "cells are 'long int' values, so do not use -a with a "
                    "value less than -2^31 or -2^63, and do not use -b with a "
                    "value more than 2^31-1 or 2^63-1, depending on your "
                    "architecture's 'long int' size.\n\n"
                    "over/underflow behaviours can be one of:\n"
                    "   e   throw an error and quit upon over/underflow\n"
                    "   i   do nothing when attempting to over/underflow\n"
                    "   w   wrap-around to other end upon over/underflow\n"
                , stderr);
                exit(1);
                break;
            case 'v': vw = optarg[0]; break;
            case 'w': cw = optarg[0]; break;
            default: break;
        }
    }
    if (!fp)
        die("no source file specified; use -f");
    for (ii = 0; (i = getc(fp)) != EOF; ++ii) {
        if (++in > ia) {
            ia *= 2;
            im = realloc(im, sizeof(*im) * ia);
        }
        switch (i) {
            case BF_OP_LSTART:
                if (ln >= la)
                    ls = realloc(ls, sizeof(*ls) * (la *= 2));
                ls[ln++] = ii;
                im[in-1].instruction = C_OP_LSTART;
                break;
            case BF_OP_LEND:
                im[in-1].loop = ls[--ln];
                im[ls[ln]].loop = ii;
                im[in-1].instruction = C_OP_LEND;
                break;
            case BF_OP_VINC:
                im[in-1].instruction = C_OP_VINC;
                break;
            case BF_OP_VDEC:
                im[in-1].instruction = C_OP_VDEC;
                break;
            case BF_OP_PINC:
                im[in-1].instruction = C_OP_PINC;
                break;
            case BF_OP_PDEC:
                im[in-1].instruction = C_OP_PDEC;
                break;
            case BF_OP_IN:
                im[in-1].instruction = C_OP_IN;
                break;
            case BF_OP_OUT:
                im[in-1].instruction = C_OP_OUT;
                break;
        }
    }
    cm = memset(malloc(cn * sizeof(long)), 0, cn * sizeof(long));
    for (ii = 0; ii < in; ii++) {
        ++instruction_count;
        switch (im[ii].instruction) {
            case C_OP_VINC:
                if (cm[ci] == vb)
                    switch (vw) {
                        case BF_END_ERROR:
                            die("value overflow");
                            break;
                        case BF_END_IGNORE: break;
                        case BF_END_WRAP: cm[ci] = 0; break;
                    }
                else ++cm[ci];
                break;
            case C_OP_VDEC:
                if (cm[ci] == 0)
                    switch (vw) {
                        case BF_END_ERROR:
                            die("value underflow");
                            break;
                        case BF_END_IGNORE: break;
                        case BF_END_WRAP: cm[ci] = vb; break;
                    }
                else --cm[ci];
                break;
            case C_OP_PINC:
                if (ci == cn - 1)
                    switch (cw) {
                        case BF_END_ERROR:
                            die("cell index overflow");
                            break;
                        case BF_END_IGNORE: break;
                        case BF_END_WRAP: ci = 0; break;
                    }
                else ++ci;
                break;
            case C_OP_PDEC:
                if (ci == 0)
                    switch (cw) {
                        case BF_END_ERROR:
                            die("cell index underflow");
                            break;
                        case BF_END_IGNORE: break;
                        case BF_END_WRAP: ci = cn - 1; break;
                    }
                else --ci;
                break;
            case C_OP_IN:
                cm[ci] = getchar();
                break;
            case C_OP_OUT:
                putchar(cm[ci]);
                break;
            case C_OP_LSTART:
                if (!cm[ci])
                    ii = im[ii].loop;
                break;
            case C_OP_LEND:
                if (cm[ci])
                    ii = im[ii].loop;
                break;
            default: break;
        }
    }
    fprintf(stderr, "Executed %d instructions\n", instruction_count);
    free(cm);
    return 0;
}
Comments