Smit Patel Smit Patel - 2 months ago 20
C Question

At compile time, my code stopped working in visual studio community 2015

My code is tokenizer should accept a string as a command-line
argument.The string will contain zero or more tokens, where each token is a either a floating-point, constant, or an integer constant in hex, decimal or octal.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct TokenizerT_ {
char* separators; // C string of separation characters
char* tokenString; // C string of unprocessed tokens
size_t separatorStringLength; // length of separator string
size_t tokenStringLength; // length of token string
int cursorPosition; // cursor to token string
};

typedef struct TokenizerT_ TokenizerT;

// Maps escaped letters to control characters
char escapeChar(char c) {
switch (c) {
case 'n':
return '\n';
case 't':
return '\t';
case 'v':
return '\v';
case 'b':
return '\b';
case 'r':
return '\r';
case 'f':
return '\f';
case 'a':
return '\a';
case '\\':
return '\\';
case '\"':
return '\"';
default:
return c;
}
}

// Maps control characters to hex values
void printChar(char c) {
switch (c) {
case '\n':
printf("[0x0a]");
break;
case '\t':
printf("[0x09]");;
break;
case '\v':
printf("[0x0b]");
break;
case '\b':
printf("[0x08]");
break;
case '\r':
printf("[0x0d]");
break;
case '\f':
printf("[0x0c]");
break;
case '\a':
printf("[0x07]");
break;
case '\\':
printf("[0x5c]");
break;
case '\"':
printf("[0x22]");
break;
default:
putchar(c);
break;
}
}

// Function to iterate through strings and replace control sequences using the escapeChar function
char* escapeString(char* str) {

size_t tempLength = strlen(str);
char* temp = (char*)malloc(sizeof(char)*(1 + tempLength));

int i;
int j = 0; // counter to observe differences between string positions

for (i = 0; i + j < tempLength + 1; i++) {
if (str[i + j] != '\\') {
temp[i] = str[i + j];
}
else {
temp[i] = escapeChar(str[i + j + 1]);
j++;
}
}

return temp;
}

TokenizerT *TKCreate(char *separators, char *ts) {

TokenizerT* tokenizer = (TokenizerT*)malloc(sizeof(TokenizerT));

if (tokenizer) {

// Escape the control sequences inside the strings
char* escapedSeps = escapeString(separators);
char* escapedToks = escapeString(ts);

// Copy the values over (eliminates dead space at end of strings)
tokenizer->separatorStringLength = strlen(escapedSeps);
tokenizer->tokenStringLength = strlen(escapedToks);
tokenizer->separators = (char*)malloc(sizeof(char)*(1 + tokenizer->separatorStringLength));
tokenizer->tokenString = (char*)malloc(sizeof(char)*(1 + tokenizer->tokenStringLength));
strcpy(tokenizer->separators, escapedSeps);
strcpy(tokenizer->tokenString, escapedToks);
tokenizer->cursorPosition = 0;

// Free temp strings
free(escapedSeps);
free(escapedToks);
}

// If malloc fails, pointer will be NULL anyway
return tokenizer;
}

void TKDestroy(TokenizerT *tk) {

free(tk->separators);
free(tk->tokenString);
free(tk);
}

// Function to check that a given character is not a token separator
int charNotSeparator(char c, TokenizerT *tk) {

int i;

for (i = 0; i < tk->separatorStringLength; i++) {
if (tk->separators[i] == c) {
return 0;
}
}

return 1; // return true
}

char *TKGetNextToken(TokenizerT *tk) {

char* nextToken;
int tokenLength = 0;
int startPosition = tk->cursorPosition;
int tokenStringLength = tk->tokenStringLength;

// increment tokenLength counter while token is incomplete
while (startPosition + tokenLength < tokenStringLength) {
if (charNotSeparator(tk->tokenString[startPosition + tokenLength], tk)) {
tokenLength++;
}
else {
break;
}
}
/*
* If tokenLength is 0, that character is a separator, and bypass it
* unless it's the end, in which case exit the program
* If the token length is nonzero, copy that token and return it
*/
if (tokenLength == 0 && (startPosition + tokenLength < tokenStringLength)) {

tk->cursorPosition++;
nextToken = (char*)malloc(sizeof(char) * 2);
strcpy(nextToken, "");
}
else if (tokenLength > 0) {
nextToken = (char*)malloc(sizeof(char)*(1 + tokenLength));
strncpy(nextToken, &(tk->tokenString[startPosition]), (size_t)tokenLength);
nextToken[tokenLength + 1] = '\0'; // strncpy doesn't automatically add '\0'
tk->cursorPosition += tokenLength + 1;
}
else {
return 0;
}

return nextToken;
}


int main(int argc, char **argv) {

char* separatorString;
char* tokenString;
char* tokenResultString;
TokenizerT* tokenizer;

separatorString = argv[1];
tokenString = argv[2];

tokenizer = TKCreate(separatorString, tokenString);

if (tokenizer) {

tokenResultString = TKGetNextToken(tokenizer);

while (tokenResultString != 0) {

if (strcmp("", tokenResultString) == 0) {
printf(tokenResultString);
}
else {
int i;
size_t length = strlen(tokenResultString);
for (i = 0; i < length; i++) {
printChar(tokenResultString[i]);
}
putchar('\n');
free(tokenResultString);
}

tokenResultString = TKGetNextToken(tokenizer);
}

TKDestroy(tokenizer);
}

return 0;
}

Answer

You have an off-by-one. The line

nextToken[tokenLength + 1] = '\0'; // strncpy doesn't automatically add '\0'

writes the \0 at two places after the string. Just do a

nextToken[tokenLength] = '\0'; // strncpy doesn't automatically add '\0'
Comments