piepi piepi - 10 months ago 60
C Question

Counting variables, arrays in lex/flex

I have started learning lex recently and tried a few examples.
I am trying to count the number of variables starting with 'a' and ending with a digit and the number of 1D arrays.

%{
#undef yywrap
#define yywrap() 1
#include<stdio.h>
int count1;
int count2;
%}
%option noyywrap
%%

int|char|bool|float" "a[a-z,A-Z,0-9]*[0-9] {count1++;}
int|char|float|bool" "[a-z,A-Z]+[0-9,a-z,A-Z]*"["[0-9]+"]" {count2++;}

%%

void main(int argc,char** argv){
FILE *fh;
if (argc == 2 && (fh = fopen(argv[1], "r")))
yyin = fh;
printf("%d %d",count1,count2);
yylex();
}


I am trying to count (1) the number of variables starting with 'a' and ending with a digit and (2) the number of 1D arrays. The input is from a "f.c" file.

//f.c

#include<stdio.h>
void main(){
char a;
char b;
char c;
int ab[5];
int bc[2];
int ca[7];
int ds[4];

}


Both the counts are showing zero and the output is:

0 0#include<stdio.h>
void main(){
a;
b;
c;
ab[5];
bc[2];
ca[7];
ds[4];

}


Also, how do I count those variables which fall in both of the categories?

Answer Source

You have the order wrong in your main. You can also use macros to make long regexes more readable.

%{
#undef yywrap
#define yywrap() 1
#include<stdio.h>
  int count1 = 0;
  int count2 = 0;
%}
TYPE int|char|bool|float
DIGIT [0-9]
ID [a-z][a-z0-9A-Z]*
SPACE " "
%option noyywrap

%%

{TYPE}{SPACE}a[a-z0-9A-Z]*{DIGIT}  {
                                     printf("111 %s\n",yytext);
                                     count1++;
                                   }
{TYPE}{SPACE}{ID}"["{DIGIT}+"]"      {
                                     printf("222 %s\n",yytext);
                                     count2++;
                                   }
%%
void main(int argc, char **argv)
{
  FILE *fh;
  if (argc == 2 && (fh = fopen(argv[1], "r"))) {
    yyin = fh;
  }
  yylex();
  printf("%d %d\n", count1, count2);
}

Run with the file

//f.c

#include<stdio.h>
void main(){
    char a123;
    char a;
    char b123;
    char c;
    int ab[5];
    int bc[2];
    int ca[7];
    int ds[4];

}

Results in the output

//f.c

#include<stdio.h>
void main(){
    111 char a123
;
    char a;
    char b123;
    char c;
    222 int ab[5]
;
    222 int bc[2]
;
    222 int ca[7]
;
    222 int ds[4]
;

}
1 4

If you want to restrict the output to tokens-only you need to handle newlines extra, so

%{
#undef yywrap
#define yywrap() 1
#include<stdio.h>
  int count1 = 0;
  int count2 = 0;
%}
TYPE int|char|bool|float
DIGIT [0-9]
ID [a-z][a-z0-9A-Z]*
SPACE " "
%option noyywrap

%%

{TYPE}{SPACE}a[a-z0-9A-Z]*{DIGIT}  {
                                     printf("111 %s\n",yytext);
                                     count1++;
                                   }
{TYPE}{SPACE}{ID}"["{DIGIT}+"]"      {
                                     printf("222 %s\n",yytext);
                                     count2++;
                                   }
.
\n
%%
void main(int argc, char **argv)
{
  FILE *fh;
  if (argc == 2 && (fh = fopen(argv[1], "r"))) {
    yyin = fh;
  }
  yylex();
  printf("%d %d\n", count1, count2);
}

Results in the output

111 char a123
222 int ab[5]
222 int bc[2]
222 int ca[7]
222 int ds[4]
1 4