Morilon Morilon - 7 days ago 6
C Question

C Program to count comment lines (// and /* */)

I need a program to count the lines of a .txt or .c file and return me the following things:

File:

Simple Comment: N lines

Multiline Comment: N lines

Total Lines: N lines


I have this:

if (pFile != NULL)
{
do {
c = fgetc(pFile);

if (c == '\n') n++;

} while (c != EOF);


And I don't know how to implement the rest of it.

I tried with the strstr() function as well, didn't get it neither.

Answer

You can write a state machine that should handle most cases.

As you scan the file, you'll be in one of the following states:

  1. TEXT - regular (non-commented) text; this is the state you'll start in. Any newline seen in this state will cause the total-lines counter to be incremented.
  2. SAW_SLASH - You've seen a single /, which may be the start of a single- or multi-line comment. If the next character is a /, you'll go into the SINGLE_COMMENT state. If the next character is a *, you'll go into the MULTI_COMMENT state. For any other character, you go back into the TEXT state.
  3. SINGLE_COMMENT - you've seen the // token; you will stay in this state until you see a newline character; once you see the newline character you'll increment the number of single-line comments as well as total lines, and go back to the TEXT state.
  4. MULTI_COMMENT - you've seen the /* token; you will stay in this state until you see the next */ token. Any newline you see in this state will cause the multi-comment line counter to be incremented along with the total lines.
  5. SAW_STAR - While in the MULTI_COMMENT state, you've seen a single *. If the next character is /, you'll go back to the TEXT state. If the next character is *, you'll stay in the SAW_STAR state. Otherwise you'll go back to the MULTI_COMMENT state.

There are edge cases that I'm not dealing with (such as encountering an EOF while in a comment state), but the following should be a reasonable example of how you can do stuff like this.

Note that nested comments won't be counted; i.e., if a //-delimited comment appears within a /* */-delimited comment, only the multi-comment counter will be updated.

You will probably want to factor the counting logic into its own function; just trying to keep the example as straightforward as I can.

#include <stdio.h>
#include <stdlib.h>

/**
 * Count up the number of total lines, single-comment lines,
 * and multi-comment lines in a file.
 */
int main(int argc, char **argv)
{
  FILE *fp;
  int c;
  unsigned int chars  = 0;
  unsigned int total  = 0;
  unsigned int multi  = 0;
  unsigned int single = 0;

  enum states { TEXT, 
                SAW_SLASH, 
                SAW_STAR, 
                SINGLE_COMMENT, 
                MULTI_COMMENT } state = TEXT;

  if ( argc < 2 )
  {
    fprintf(stderr, "USAGE: %s <filename>\n", argv[0]);
    exit(0);
  }

  fp = fopen( argv[1], "r" );
  if ( !fp )
  {
    fprintf(stderr, "Cannot open file %s\n", argv[1] );
    exit(0);
  }

  while ( (c = fgetc( fp )) != EOF )
  {
    chars++;
    switch( state )
    {
      case TEXT :
        switch( c )
        {
          case '/'  : state = SAW_SLASH; break;
          case '\n' : total++; // fall-through
          default   : break;
        }
        break;

      case SAW_SLASH :
        switch( c )
        {
          case '/'  : state = SINGLE_COMMENT; break;
          case '*'  : state = MULTI_COMMENT; break;
          case '\n' : total++; // fall through
          default   : state = TEXT; break;
        }
        break;

      case SAW_STAR :
        switch( c )
        {
          case '/'  : state = TEXT; multi++; break;
          case '*'  : break;
          case '\n' : total++; multi++; // fall through
          default   : state = MULTI_COMMENT; break;
        }
        break;

      case SINGLE_COMMENT :
        switch( c )
        {
          case '\n' : state = TEXT; total++; single++; // fall through
          default   : break;
        }
        break;

      case MULTI_COMMENT :
        switch( c )
        {
          case '*'  : state = SAW_STAR; break;
          case '\n' : total++; multi++; // fall through
          default   : break;
        }
        break;

      default: // NOT REACHABLE
        break;
    }
  }

  fclose(fp);

  printf( "File                 : %s\n", argv[1] );
  printf( "Total lines          : %8u\n", total );
  printf( "Single-comment lines : %8u\n", single );
  printf( "Multi-comment lines  : %8u\n", multi );
  return 0;
}

EDIT

Here's a table-driven equivalent to the program above. I create a state table to control state transitions and an action table to control what happens when I change state.

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

/**
 * Using preprocessor macros instead of enums, per request; normally
 * I would use enums, since they obey scoping rules and
 * show up in debuggers.
 */
#define TEXT           0
#define SAW_SLASH      1
#define SAW_STAR       2
#define SINGLE_COMMENT 3
#define MULTI_COMMENT  4

#define TOTAL_STATES   5

#define NO_ACTION      0
#define INC_TOTAL      1
#define INC_SINGLE     2
#define INC_MULTI      4

/**
 * This example assumes 7-bit ASCII, for a total of
 * 128 character encodings.  You'll want to change this
 * to handle other encodings.
 */
#define ENCODINGS    128

/**
 * Need a state table to control state transitions and an action
 * table to specify what happens on a transition.  Each table
 * is indexed by the state and the next input character.
 */
static int  state[TOTAL_STATES][ENCODINGS]; // Since these tables are declared at file scope, they will be initialized to
static int action[TOTAL_STATES][ENCODINGS]; // all elements 0, which correspond to the "default" states defined above.

/**
 * Initialize our state table.
 */
void initState( int (*state)[ENCODINGS] )
{
  /**
   * If we're in the TEXT state and see a '/' character, move to the SAW_SLASH
   * state, otherwise stay in the TEXT state
   */
  state[TEXT]['/'] = SAW_SLASH;

  /**
   * If we're in the SAW_SLASH state, we can go one of three ways depending
   * on the next character.
   */
  state[SAW_SLASH]['/'] = SINGLE_COMMENT;
  state[SAW_SLASH]['*'] = MULTI_COMMENT;
  state[SAW_SLASH]['\n'] = TEXT;

  /**
   * For all but a few specific characters, if we're in any one of
   * the SAW_STAR, SINGLE_COMMENT, or MULTI_COMMENT states,
   * we stay in that state.
   */
  for ( size_t i = 0; i < ENCODINGS; i++ )
  {
    state[SAW_STAR][i] = MULTI_COMMENT;
    state[SINGLE_COMMENT][i] = SINGLE_COMMENT;
    state[MULTI_COMMENT][i] = MULTI_COMMENT;
  }

  /**
   * Exceptions to the loop above.
   */
  state[SAW_STAR]['/'] = TEXT;
  state[SAW_STAR]['*'] = SAW_STAR;

  state[SINGLE_COMMENT]['\n'] = TEXT;
  state[MULTI_COMMENT]['*'] = SAW_STAR;
}

/**
 * Initialize our action table
 */
void initAction( int (*action)[ENCODINGS] )
{
  action[TEXT]['\n'] = INC_TOTAL;
  action[SAW_STAR]['/'] = INC_MULTI;
  action[MULTI_COMMENT]['\n'] = INC_MULTI | INC_TOTAL;   // Multiple actions are bitwise-OR'd
  action[SINGLE_COMMENT]['\n'] = INC_SINGLE | INC_TOTAL; // together
  action[SAW_SLASH]['\n'] = INC_TOTAL;
}

/**
 * Scan the input file for comments
 */
void countComments( FILE *stream, size_t *totalLines, size_t *single, size_t *multi )
{
  *totalLines = *single = *multi = 0;

  int c;
  int curState = TEXT, curAction = NO_ACTION;

  while ( ( c = fgetc( stream ) ) != EOF )
  {
    curAction = action[curState][c]; // Read the action before we overwrite the state
    curState = state[curState][c];   // Get the new state (which may be the same as the old state)

    if ( curAction & INC_TOTAL )     // Execute the action.
      (*totalLines)++;

    if ( curAction & INC_SINGLE )
      (*single)++;

    if ( curAction & INC_MULTI )
      (*multi)++;
  }
}

/**
 * Main function.
 */
int main( int argc, char **argv )
{
  /**
   * Input sanity check
   */
  if ( argc < 2 )
  {
    fprintf( stderr, "USAGE: %s <filename>\n", argv[0] );
    exit( EXIT_FAILURE );
  }

  /**
   * Open the input file
   */
  FILE *fp = fopen( argv[1], "r" );
  if ( !fp )
  {
    fprintf( stderr, "Cannot open file %s\n", argv[1] );
    exit( EXIT_FAILURE );
  }

  /**
   * If input file was successfully opened, initialize our
   * state and action tables.
   */
  initState( state );
  initAction( action );

  size_t totalLines, single, multi;

  /**
   * Do the thing.
   */
  countComments( fp, &totalLines, &single, &multi );
  fclose( fp );

  printf( "File                 : %s\n", argv[1] );
  printf( "Total lines          : %zu\n", totalLines );
  printf( "Single-comment lines : %zu\n", single );
  printf( "Multi-comment lines  : %zu\n", multi );

  return EXIT_SUCCESS;
}

Running the file on itself gives us

$ ./comment_counter comment_counter.c
File                 : comment_counter.c
Total lines          : 150
Single-comment lines : 7
Multi-comment lines  : 42

which I think is right. This has all the same weaknesses as the first version, just in a different form.

Comments