S Hristoskov S Hristoskov - 2 months ago 11
C Question

Function that extracts words from text ( array of chars ) and put them in 2 dimensions array

I'm learning C and have some struggles.I have to make a program , which becomes a text (max 80 chars) and put the words from text in a char words[80][80] (every word must be only single time in this array! it is also defined as global) and count of times every word comes in the text in a int count[] (Index must be same as this from words[][]).
The function is called int extract_and_count(char *source,int *count).
I wrote some code ,but I'm not sure how exactly to implement this function.Can someone help me?
I'm also new to stackoverflow so if I have made any mistake, sorry.

Thats some of the code but its not to the end:

int extract_and_count(char *source,int *count){
char token[80][80];
char *p;
int i = 0;
p = strtok(source, " ");
while( p != NULL ){
strcpy(token[i],p);
printf("%s\n",*(token+i));
i++;
p = strtok(NULL , " ");
}
char word;
int value = 0, j;
for(i = 0 ; i < 80 ; i++){
word = token[i];
for(j = 0 ; j < 80 ; j++){
if(strcmp(word,token[i])==0){
value++;
}
}

}
return 1;
}

Answer

You need to check if a word has been found already. If so, just increment the global counter. Otherwise, copy the new word to the global array of strings.

Something like:

#include <stdio.h>
#include <string.h>

// Global variables to hold the results
char word[80][81];
int  count[80] = { 0 };

int extract_and_count(char *source,int *strings_cnt){
  char token[80][81];
  char *p;
  int i = 0;

  // Find all words in the input string
  p = strtok(source, " ");
  while( p != NULL ){
    strcpy(token[i],p);
    // printf("%s\n",*(token+i));
    i++;
    p = strtok(NULL , " ");
  }

  // Find unique words and count the number a word is repeated
  *strings_cnt = 0;
  int j,k;

  // Iterator over all words found in the input string
  for(j = 0 ; j < i ; j++){

    // Check if the word is already detected once
    int found = 0;
    for(k = 0 ; k < *strings_cnt ; k++){
      if (strcmp(word[k], token[j]) == 0)
      {
        // The word already exists - increment count
        found = 1;
        count[k]++;
        break;
      }
    }

    if (!found)
    {
      // New word - copy it and set count to 1
      strcpy(word[*strings_cnt], token[j]);
      count[*strings_cnt] = 1;
      (*strings_cnt)++;
    }
  }

  return 1;
}

int main(void)
{
  char s[] = "c language is difficult c is also fun";
  int c, i;

  printf("Searching: %s\n", s);

  extract_and_count(s, &c);

  printf("Found %d different words\n", c);
  for (i=0; i<c; i++)
  {
    printf("%d times: %s\n", count[i], word[i]);
  }
  return 0;
}

Output:

Searching: c language is difficult c is also fun
Found 6 different words
2 times: c
1 times: language
2 times: is
1 times: difficult
1 times: also
1 times: fun

Above I tried to follow your codes style but I like to add these comments:

1) You don't really need the token array. The first loop can be changed so that it updates the final result directly.

2) Don't use global variable

3) The code can't handle normal separators like , . : and so on

4) You should put the word and the count into a struct.

Taken comment 1,2 and 4 in to consideration, the code could be:

#include <stdio.h>
#include <string.h>

// Global variables to hold the results
struct WordStat
{
  char word[81];
  int count;
};


int extract_and_count(char *source,int *strings_cnt, struct WordStat* ws, int max){
  char *p;
  int i = 0;
  int k;
  *strings_cnt = 0;

  // Find all words in the input string
  p = strtok(source, " ");
  while( p != NULL ){
    // Check if the word is already detected once
    int found = 0;
    for(k = 0 ; k < *strings_cnt ; k++){
      if (strcmp(ws[k].word, p) == 0)
      {
        // The word already exists - increment count
        found = 1;
        ws[k].count++;
        break;
      }
    }

    if (!found)
    {
      // New word - copy it and set count to 1
      strcpy(ws[*strings_cnt].word, p);
      ws[*strings_cnt].count = 1;
      (*strings_cnt)++;
    }

    i++;
    p = strtok(NULL , " ");
  }

  return 1;
}

#define MAX_WORDS 80

int main(void)
{
  struct WordStat ws[MAX_WORDS];
  char s[] = "c language is difficult c is also fun";
  int c, i;

  printf("Searching: %s\n", s);

  extract_and_count(s, &c, ws, MAX_WORDS);

  printf("Found %d different words\n", c);
  for (i=0; i<c; i++)
  {
    printf("%d times: %s\n", ws[i].count, ws[i].word);
  }
  return 0;
}
Comments