CPP_NEW CPP_NEW - 1 month ago 13
C Question

faster substring processing C++

I have a program which does some processing on all possible substrings of a certain length. I am trying to make the program as fast as possible. I am just wondering what could be done to the following program to make it more faster

char str[] = "abcdcddcdcdcdcd....................." // large string
int n = strlen(str), m = 20;
for(int i=0; i<n; i++){
char *substr = (char*) malloc(sizeof(char)*m);
strncpy(substr, str+i, m);
// do some processing
int h = hd(substr, X) // X is another string of same length
free(substr);
}

unsigned int hd(const std::string& s1, const std::string& s2)
{

return std::inner_product(
s1.begin(), s1.end(), s2.begin(),
0, std::plus<unsigned int>(),
std::not2(std::equal_to<std::string::value_type>())
);
}

Answer

Perhaps like this. It avoids multiple string handling, by passing the pointer of the current substring, and the length of the string to match.

#include <stdio.h>
#include <string.h>

int hd(char *str, char *cmp, int len)
// find hamming distance between substring *str and *cmp of length len
{
    int ind, hamming = 0;
    for(ind=0; ind<len; ind++) {
        if(str[ind] != cmp[ind]) {
            hamming++;
        }
    }
    return hamming;
}

int main(void)
// find hamming distance
{
    char str[] = "abcdcddcdcdcdcd";
    char cmp[] = "abc";
    int lens = strlen(str);
    int lenc = strlen(cmp);
    int ind, max;
    max = lens - lenc;
    // analyse each possible substring
    for(ind=0; ind<=max; ind++) {
        printf("%d\n", hd(str + ind, cmp, lenc));
    }
}
Comments