piepi - 21 days ago 10
C Question

# Counting variables, arrays in lex/flex

I have started learning lex recently and tried a few examples.
I am trying to count the number of variables starting with 'a' and ending with a digit and the number of 1D arrays.

``````%{
#undef yywrap
#define yywrap() 1
#include<stdio.h>
int count1;
int count2;
%}
%option noyywrap
%%

int|char|bool|float" "a[a-z,A-Z,0-9]*[0-9] {count1++;}
int|char|float|bool" "[a-z,A-Z]+[0-9,a-z,A-Z]*"["[0-9]+"]" {count2++;}

%%

void main(int argc,char** argv){
FILE *fh;
if (argc == 2 && (fh = fopen(argv[1], "r")))
yyin = fh;
printf("%d %d",count1,count2);
yylex();
}
``````

I am trying to count (1) the number of variables starting with 'a' and ending with a digit and (2) the number of 1D arrays. The input is from a "f.c" file.

``````//f.c

#include<stdio.h>
void main(){
char a;
char b;
char c;
int ab[5];
int bc[2];
int ca[7];
int ds[4];

}
``````

Both the counts are showing zero and the output is:

``````0 0#include<stdio.h>
void main(){
a;
b;
c;
ab[5];
bc[2];
ca[7];
ds[4];

}
``````

Also, how do I count those variables which fall in both of the categories?

You have the order wrong in your `main`. You can also use macros to make long regexes more readable.

``````%{
#undef yywrap
#define yywrap() 1
#include<stdio.h>
int count1 = 0;
int count2 = 0;
%}
TYPE int|char|bool|float
DIGIT [0-9]
ID [a-z][a-z0-9A-Z]*
SPACE " "
%option noyywrap

%%

{TYPE}{SPACE}a[a-z0-9A-Z]*{DIGIT}  {
printf("111 %s\n",yytext);
count1++;
}
{TYPE}{SPACE}{ID}"["{DIGIT}+"]"      {
printf("222 %s\n",yytext);
count2++;
}
%%
void main(int argc, char **argv)
{
FILE *fh;
if (argc == 2 && (fh = fopen(argv[1], "r"))) {
yyin = fh;
}
yylex();
printf("%d %d\n", count1, count2);
}
``````

Run with the file

``````//f.c

#include<stdio.h>
void main(){
char a123;
char a;
char b123;
char c;
int ab[5];
int bc[2];
int ca[7];
int ds[4];

}
``````

Results in the output

``````//f.c

#include<stdio.h>
void main(){
111 char a123
;
char a;
char b123;
char c;
222 int ab[5]
;
222 int bc[2]
;
222 int ca[7]
;
222 int ds[4]
;

}
1 4
``````

If you want to restrict the output to tokens-only you need to handle newlines extra, so

``````%{
#undef yywrap
#define yywrap() 1
#include<stdio.h>
int count1 = 0;
int count2 = 0;
%}
TYPE int|char|bool|float
DIGIT [0-9]
ID [a-z][a-z0-9A-Z]*
SPACE " "
%option noyywrap

%%

{TYPE}{SPACE}a[a-z0-9A-Z]*{DIGIT}  {
printf("111 %s\n",yytext);
count1++;
}
{TYPE}{SPACE}{ID}"["{DIGIT}+"]"      {
printf("222 %s\n",yytext);
count2++;
}
.
\n
%%
void main(int argc, char **argv)
{
FILE *fh;
if (argc == 2 && (fh = fopen(argv[1], "r"))) {
yyin = fh;
}
yylex();
printf("%d %d\n", count1, count2);
}
``````

Results in the output

``````111 char a123
222 int ab[5]
222 int bc[2]
222 int ca[7]
222 int ds[4]
1 4
``````
Source (Stackoverflow)