C program to detect tokens in a C program
As it is known that Lexical Analysis is the first phase of compiler also known as scanner. It converts the input program into a sequence of Tokens.
A C program consists of various tokens and a token is either a keyword, an identifier, a constant, a string literal, or a symbol.
For Example:
1) Keywords: Examples- for, while, if etc. 2) Identifier Examples- Variable name, function name etc. 3) Operators: Examples- '+', '++', '-' etc. 4) Separators: Examples- ', ' ';' etc
Below is a C program to print all the keywords, literals, valid identifiers, invalid identifiers, integer number, real number in a given C program:
C
#include <stdbool.h> #include <stdio.h> #include <string.h> #include <stdlib.h> // Returns 'true' if the character is a DELIMITER. bool isDelimiter( char ch) { if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ',' || ch == ';' || ch == '>' || ch == '<' || ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}' ) return ( true ); return ( false ); } // Returns 'true' if the character is an OPERATOR. bool isOperator( char ch) { if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == '<' || ch == '=' ) return ( true ); return ( false ); } // Returns 'true' if the string is a VALID IDENTIFIER. bool validIdentifier( char * str) { if (str[0] == '0' || str[0] == '1' || str[0] == '2' || str[0] == '3' || str[0] == '4' || str[0] == '5' || str[0] == '6' || str[0] == '7' || str[0] == '8' || str[0] == '9' || isDelimiter(str[0]) == true ) return ( false ); return ( true ); } // Returns 'true' if the string is a KEYWORD. bool isKeyword( char * str) { if (! strcmp (str, "if" ) || ! strcmp (str, "else" ) || ! strcmp (str, "while" ) || ! strcmp (str, "do" ) || ! strcmp (str, "break" ) || ! strcmp (str, "continue" ) || ! strcmp (str, "int" ) || ! strcmp (str, "double" ) || ! strcmp (str, "float" ) || ! strcmp (str, "return" ) || ! strcmp (str, "char" ) || ! strcmp (str, "case" ) || ! strcmp (str, "char" ) || ! strcmp (str, "sizeof" ) || ! strcmp (str, "long" ) || ! strcmp (str, "short" ) || ! strcmp (str, "typedef" ) || ! strcmp (str, "switch" ) || ! strcmp (str, "unsigned" ) || ! strcmp (str, "void" ) || ! strcmp (str, "static" ) || ! strcmp (str, "struct" ) || ! strcmp (str, "goto" )) return ( true ); return ( false ); } // Returns 'true' if the string is an INTEGER. bool isInteger( char * str) { int i, len = strlen (str); if (len == 0) return ( false ); for (i = 0; i < len; i++) { if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' || (str[i] == '-' && i > 0)) return ( false ); } return ( true ); } // Returns 'true' if the string is a REAL NUMBER. bool isRealNumber( char * str) { int i, len = strlen (str); bool hasDecimal = false ; if (len == 0) return ( false ); for (i = 0; i < len; i++) { if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i] != '5' && str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' && str[i] != '.' || (str[i] == '-' && i > 0)) return ( false ); if (str[i] == '.' ) hasDecimal = true ; } return (hasDecimal); } // Extracts the SUBSTRING. char * subString( char * str, int left, int right) { int i; char * subStr = ( char *) malloc ( sizeof ( char ) * (right - left + 2)); for (i = left; i <= right; i++) subStr[i - left] = str[i]; subStr[right - left + 1] = '\0' ; return (subStr); } // Parsing the input STRING. void parse( char * str) { int left = 0, right = 0; int len = strlen (str); while (right <= len && left <= right) { if (isDelimiter(str[right]) == false ) right++; if (isDelimiter(str[right]) == true && left == right) { if (isOperator(str[right]) == true ) printf ( "'%c' IS AN OPERATOR\n" , str[right]); right++; left = right; } else if (isDelimiter(str[right]) == true && left != right || (right == len && left != right)) { char * subStr = subString(str, left, right - 1); if (isKeyword(subStr) == true ) printf ( "'%s' IS A KEYWORD\n" , subStr); else if (isInteger(subStr) == true ) printf ( "'%s' IS AN INTEGER\n" , subStr); else if (isRealNumber(subStr) == true ) printf ( "'%s' IS A REAL NUMBER\n" , subStr); else if (validIdentifier(subStr) == true && isDelimiter(str[right - 1]) == false ) printf ( "'%s' IS A VALID IDENTIFIER\n" , subStr); else if (validIdentifier(subStr) == false && isDelimiter(str[right - 1]) == false ) printf ( "'%s' IS NOT A VALID IDENTIFIER\n" , subStr); left = right; } } return ; } // DRIVER FUNCTION int main() { // maximum length of string is 100 here char str[100] = "int a = b + 1c; " ; parse(str); // calling the parse function return (0); } |
Output:
'int' IS A KEYWORD 'a' IS A VALID IDENTIFIER '=' IS AN OPERATOR 'b' IS A VALID IDENTIFIER '+' IS AN OPERATOR '1c' IS NOT A VALID IDENTIFIER
Contact Us