piepi piepi - 2 months ago 23
C Question

Counting variables, arrays in lex/flex

I have started learning lex recently and tried a few examples.
I am trying to count the number of variables starting with 'a' and ending with a digit and the number of 1D arrays.

%{
#undef yywrap
#define yywrap() 1
#include<stdio.h>
int count1;
int count2;
%}
%option noyywrap
%%

int|char|bool|float" "a[a-z,A-Z,0-9]*[0-9] {count1++;}
int|char|float|bool" "[a-z,A-Z]+[0-9,a-z,A-Z]*"["[0-9]+"]" {count2++;}

%%

void main(int argc,char** argv){
FILE *fh;
if (argc == 2 && (fh = fopen(argv[1], "r")))
yyin = fh;
printf("%d %d",count1,count2);
yylex();
}


I am trying to count (1) the number of variables starting with 'a' and ending with a digit and (2) the number of 1D arrays. The input is from a "f.c" file.

//f.c

#include<stdio.h>
void main(){
char a;
char b;
char c;
int ab[5];
int bc[2];
int ca[7];
int ds[4];

}


Both the counts are showing zero and the output is:

0 0#include<stdio.h>
void main(){
a;
b;
c;
ab[5];
bc[2];
ca[7];
ds[4];

}


Also, how do I count those variables which fall in both of the categories?

Answer

You have the order wrong in your main. You can also use macros to make long regexes more readable.

%{
#undef yywrap
#define yywrap() 1
#include<stdio.h>
  int count1 = 0;
  int count2 = 0;
%}
TYPE int|char|bool|float
DIGIT [0-9]
ID [a-z][a-z0-9A-Z]*
SPACE " "
%option noyywrap

%%

{TYPE}{SPACE}a[a-z0-9A-Z]*{DIGIT}  {
                                     printf("111 %s\n",yytext);
                                     count1++;
                                   }
{TYPE}{SPACE}{ID}"["{DIGIT}+"]"      {
                                     printf("222 %s\n",yytext);
                                     count2++;
                                   }
%%
void main(int argc, char **argv)
{
  FILE *fh;
  if (argc == 2 && (fh = fopen(argv[1], "r"))) {
    yyin = fh;
  }
  yylex();
  printf("%d %d\n", count1, count2);
}

Run with the file

//f.c

#include<stdio.h>
void main(){
    char a123;
    char a;
    char b123;
    char c;
    int ab[5];
    int bc[2];
    int ca[7];
    int ds[4];

}

Results in the output

//f.c

#include<stdio.h>
void main(){
    111 char a123
;
    char a;
    char b123;
    char c;
    222 int ab[5]
;
    222 int bc[2]
;
    222 int ca[7]
;
    222 int ds[4]
;

}
1 4

If you want to restrict the output to tokens-only you need to handle newlines extra, so

%{
#undef yywrap
#define yywrap() 1
#include<stdio.h>
  int count1 = 0;
  int count2 = 0;
%}
TYPE int|char|bool|float
DIGIT [0-9]
ID [a-z][a-z0-9A-Z]*
SPACE " "
%option noyywrap

%%

{TYPE}{SPACE}a[a-z0-9A-Z]*{DIGIT}  {
                                     printf("111 %s\n",yytext);
                                     count1++;
                                   }
{TYPE}{SPACE}{ID}"["{DIGIT}+"]"      {
                                     printf("222 %s\n",yytext);
                                     count2++;
                                   }
.
\n
%%
void main(int argc, char **argv)
{
  FILE *fh;
  if (argc == 2 && (fh = fopen(argv[1], "r"))) {
    yyin = fh;
  }
  yylex();
  printf("%d %d\n", count1, count2);
}

Results in the output

111 char a123
222 int ab[5]
222 int bc[2]
222 int ca[7]
222 int ds[4]
1 4