?
Solved

HI NEED help with lexical anaylsis using C

Posted on 2003-03-19
1
Medium Priority
?
259 Views
Last Modified: 2010-04-15
Would it be possible for you to combine the lexical analyser and the LR parser so that the output of the lexical analyser can be used by the parser. It may be possible to do this by physically combining the lexical analyser and the LR parser into one file, or else stdout to read/pipe tokens, read by the analyser into the parser.

Here are the lex and LR files for your reference:

--------------------------------------------------------------------------------------------
/* A simple implementation of an LR parser
 *
 * -- current tables process a simple expression grammar

#include <stdio.h>
#include <stdlib.h>

/* The particular grammar accepted by an LR parser is determined
 * by the entries in its action and goto tables.

 * Define some constants to name the states and possible actions.
 */

/* note: ERROR_STATE does not form its own row in the tables,
 * and so is not counted for num_states */
enum states { STATE_0, STATE_1, STATE_2, STATE_3, STATE_4, STATE_5, STATE_6,
            STATE_7, STATE_8, STATE_9, STATE_10, STATE_11, ERROR_STATE };
#define num_states 12

enum terminals { PLUS, TIMES, L_PAREN, R_PAREN, ID, NUM, END_CHAR };
#define num_terminals 9

enum nonterminals { NON_EE, NON_E, NON_T, NON_F };
#define num_nonterminals 4

/* The action_table contains actions which are structures
 * each structure will holds its name and an argument
 * if the action is SHIFT_ACTION, then argument will hold the new state
 * if the action is REDUCE_ACTION, then argument will hold the rule to reduce with
 * (the ERROR_ACTION and ACCEPT_ACTION ignore the argument, so give it any value)
 */
enum action_name { SHIFT_ACTION, REDUCE_ACTION, ERROR_ACTION, ACCEPT_ACTION };

struct action {
  int name;
  int argument;
} ;

struct action action_table[num_states][num_terminals];
int goto_table[num_states][num_nonterminals];

void run_lr_parser(void);
void setup_action_table(void);
void setup_goto_table(void);

struct action make_action (int, int);

/* two arrays with info about the rules
 * Note, we only need to store the number of items in the right-hand-side
 * and the non-terminal token on the left-hand-side */
int right_side_size[7] = { 1, 3, 1, 3, 1, 3, 1 };
int left_side[7] = { NON_EE, NON_E, NON_E, NON_T, NON_T, NON_F, NON_F };


/* An LR parser needs a stack to hold states and symbols.
 * We define a simple stack of integers, which will hold
 * instances from the enumerated types of states and symbols. */

/* define the stack variables and functions */
#define MAX_STACK 100        /* the maximum size of our stack */
int lr_stack[MAX_STACK];     /* the stack is an array of integers */
int stack_pointer = 0;       /* points to the next free element in the stack */

void stack_clear (void);          /* clear the stack */
void stack_push (int );           /* adds an item to the stack */
int stack_pop (void);             /* removes the top item from the stack */
int stack_peek (void);            /* looks at the top item of the stack */
void stack_overflow_error(void);  /* report an error if too many items are pushed */
void stack_underflow_error(void); /* report an error if too many items are popped */

/* global array to hold values of the input tokens */
#define MAX_SIZE 100        /* NB: Increase if you want more than 100 tokens */
int input_array[MAX_SIZE];


int main ()
{
  /* load the example tokens into the input array */
  input_array[0] = ID;
  input_array[1] = TIMES;
  input_array[2] = ID;
  input_array[3] = PLUS;
  input_array[4] = ID;
  input_array[5] = END_CHAR;
  /* put an explicit end marker, in case of bad errors in action table */
  input_array[6] = -1;

  run_lr_parser();

  return 0;
}

/* the lr parser processes the tokens in the input_array */
void run_lr_parser()
{
  int ip = 0; /* point to start of input_array */
  /* some place holders for variables in the routine */
  int current_state;
  int current_symbol;
  int next_state;
  int next_symbol;
  int i;

  struct action current_action;

  setup_action_table();
  setup_goto_table();
  stack_push(STATE_0);  /* start with state_0 on the stack */
   
  do { /* this do loop repeats until an error or accept state is reached */

    current_state = stack_peek();
    current_symbol = input_array[ip];

    if (current_symbol == -1)
      {
      printf("LR Parsing error -- out of input tokens \n");
      exit(EXIT_FAILURE);
      }

    /* retrieve the current action from the action_table */
    current_action = action_table[current_state][current_symbol];
    printf("Current action %d %d\n", current_action.name, current_action.argument);
   
    if (current_action.name == SHIFT_ACTION)
      { /* a shift action simply places the symbol and
       * the action's argument onto the stack,
       * before stepping the program pointer on one */
      printf("Shift %d\n", current_action.argument);
      stack_push(current_symbol);
      stack_push(current_action.argument);
      ip = ip+1;
      }
    else if (current_action.name == REDUCE_ACTION)
      { /* a reduce action removes symbols from the stack,
       * the number to be removed is twice the size of the rule's right-hand-side
       */
      for (i=0; i< (2*right_side_size[current_action.argument]); i++)
        {
          stack_pop();
        }
      next_state = stack_peek();
      /* the next symbol to be placed on the stack is the non-terminal
       * on the left of the rule */
      next_symbol = left_side[current_action.argument];
      stack_push(next_symbol);
      /* you then look at the goto table to retrieve the next stack number */
      if (goto_table[next_state][next_symbol] == ERROR_STATE)
        {
          printf("LR Parsing Error\n");
          exit(EXIT_FAILURE);
        }
      else
        stack_push(goto_table[next_state][next_symbol]);

      printf("Reduce %d\n", current_action.argument);
      }
    else if (current_action.name == ERROR_ACTION)
      {
      printf("LR Parsing Error\n");
      exit(EXIT_FAILURE);
      }
    else /* if (current_action.name == ACCEPT_ACTION) */
      {
      printf("LR Parser accepted input\n");
      break;
      }

  } while (1==1); /* repeats forever */

  /* you should break out to here if successful */
}

/* ********** handle the action and goto tables ********** */
void setup_action_table ()
{
  int i, j;

  /* clear the action table */
  for (i=0; i<num_states; i++)
    for (j=0; j<num_terminals; j++)
      action_table[i][j] = make_action( ERROR_ACTION, ERROR_STATE );

  /* state 0 */
  action_table[STATE_0][ID] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_0][NUM] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_0][L_PAREN] = make_action ( SHIFT_ACTION, STATE_4 );
  /* state 1 */
  action_table[STATE_1][PLUS] = make_action ( SHIFT_ACTION, STATE_6 );
  action_table[STATE_1][END_CHAR] = make_action ( ACCEPT_ACTION, STATE_0);
  /* state 2 */
  action_table[STATE_2][PLUS] = make_action ( REDUCE_ACTION, 2 );
  action_table[STATE_2][TIMES] = make_action ( SHIFT_ACTION, STATE_7 );
  action_table[STATE_2][R_PAREN] = make_action ( REDUCE_ACTION, 2 );
  action_table[STATE_2][END_CHAR] = make_action ( REDUCE_ACTION, 2 );
  /* state 3 */
  action_table[STATE_3][PLUS] = make_action ( REDUCE_ACTION, 4 );
  action_table[STATE_3][TIMES] = make_action ( REDUCE_ACTION, 4 );
  action_table[STATE_3][R_PAREN] = make_action ( REDUCE_ACTION, 4);
  action_table[STATE_3][END_CHAR] = make_action ( REDUCE_ACTION, 4);
  /* state 4 */
  action_table[STATE_4][L_PAREN] = make_action ( SHIFT_ACTION, STATE_4 );
  action_table[STATE_4][ID] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_4][NUM] = make_action ( SHIFT_ACTION, STATE_5 );
  /* state 5 */
  action_table[STATE_5][PLUS] = make_action ( REDUCE_ACTION, 6 );
  action_table[STATE_5][TIMES] = make_action ( REDUCE_ACTION, 6 );
  action_table[STATE_5][R_PAREN] = make_action (REDUCE_ACTION, 6);
  action_table[STATE_5][END_CHAR] = make_action (REDUCE_ACTION, 6);
  /* state 6 */
  action_table[STATE_6][L_PAREN] = make_action ( SHIFT_ACTION, STATE_4 );
  action_table[STATE_6][ID] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_6][NUM] = make_action ( SHIFT_ACTION, STATE_5 );
  /* state 7 */
  action_table[STATE_7][L_PAREN] = make_action ( SHIFT_ACTION, STATE_4 );
  action_table[STATE_7][ID] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_7][NUM] = make_action ( SHIFT_ACTION, STATE_5 );
  /* state 8 */
  action_table[STATE_8][PLUS] = make_action ( SHIFT_ACTION, STATE_6 );
  action_table[STATE_8][R_PAREN] = make_action ( SHIFT_ACTION, STATE_11 );
  /* state 9 */
  action_table[STATE_9][PLUS] = make_action ( REDUCE_ACTION, 1 );
  action_table[STATE_9][TIMES] = make_action (SHIFT_ACTION, 7 );
  action_table[STATE_9][R_PAREN] = make_action (REDUCE_ACTION, 1 );
  action_table[STATE_9][END_CHAR] = make_action (REDUCE_ACTION, 1 );
  /* state 10 */
  action_table[STATE_10][PLUS] = make_action ( REDUCE_ACTION, 3 );
  action_table[STATE_10][TIMES] = make_action ( REDUCE_ACTION, 3 );
  action_table[STATE_10][R_PAREN] = make_action (REDUCE_ACTION, 3);
  action_table[STATE_10][END_CHAR] = make_action (REDUCE_ACTION, 3);
  /* state 11 */
  action_table[STATE_11][PLUS] = make_action ( REDUCE_ACTION, 5 );
  action_table[STATE_11][TIMES] = make_action (REDUCE_ACTION, 5 );
  action_table[STATE_11][R_PAREN] = make_action (REDUCE_ACTION, 5);
  action_table[STATE_11][END_CHAR] = make_action (REDUCE_ACTION, 5);
}

void setup_goto_table ()
{
  int i, j;

  /* clear the goto table */
  for (i=0; i<num_states; i++)
    for (j=0; j<num_nonterminals; j++)
      goto_table[i][j] = ERROR_STATE;

  /* entries for the goto table */
  goto_table[STATE_0][NON_E] = STATE_1;
  goto_table[STATE_0][NON_T] = STATE_2;
  goto_table[STATE_0][NON_F] = STATE_3;
  goto_table[STATE_4][NON_E] = STATE_8;
  goto_table[STATE_4][NON_T] = STATE_2;
  goto_table[STATE_4][NON_F] = STATE_3;
  goto_table[STATE_6][NON_T] = STATE_9;
  goto_table[STATE_6][NON_F] = STATE_3;
  goto_table[STATE_7][NON_F] = STATE_10;  

}

/* construct an action structure given its two arguments */
struct action make_action (int name, int argument)
{
  struct action new_action;

  new_action.name = name;
  new_action.argument = argument;
  return new_action;
}

/* ********** functions for the stack ********** */
void stack_clear ()
{
  stack_pointer = 0;
}

void stack_push (int new_item)
{
  if (stack_pointer < MAX_STACK)
    {
      /* add item to the stack, and advance the stack pointer */
      lr_stack[stack_pointer] = new_item;
      stack_pointer = stack_pointer + 1;
    }
  else
    {
      stack_overflow_error();
    }
}

int stack_pop ()
{
  if (stack_pointer == 0)
    {
      stack_underflow_error();
    }

  stack_pointer = stack_pointer - 1;
  return lr_stack[stack_pointer];
     
}

int stack_peek ()
{
  if (stack_pointer == 0)
    {
      stack_underflow_error();
    }

  return lr_stack[stack_pointer-1];
}

void stack_overflow_error ()
{
  printf("Error: stack overflow\n");
  exit(0);
}

void stack_underflow_error ()
{
  printf("Error: stack underflow\n");
  exit(0);
}

----------------------------------------------------------------------------------------------
/* lexical analyser

 * outputs tokens for + * ( ) id num
*/

#include<stdio.h>
#include<string.h>
#include<ctype.h>

/* define the functions */
int get_next_input(void);
void setup_state_table(void);
void setup_output_table(void);

/* define the character classes and states */
enum chars { END_CHAR, PLUS, TIMES, L_PAREN, R_PAREN,
           CHAR, DIGIT, WHITESPACE, OTHER_CHAR };
#define num_chars 9

enum states { STATE_A, STATE_B, STATE_C, STATE_D, STATE_E, STATE_F, STATE_G,
            END_STATE, ERROR_STATE };
#define num_states 9

/* provide 2-dimensional arrays to store the transition and output tables */
int state_table[num_states][num_chars];
char * output_table[num_states][num_chars];

int main()
{
  int state = STATE_A;   /* the start state */
  int input;

  setup_state_table();
  setup_output_table();

  do {
    input = get_next_input();
    /* output the message for this state, if present */
    if (strcmp(output_table[state][input], ""))
      printf("%s\n", output_table[state][input]);

    state = state_table[state][input];
  } while ( (state != END_STATE) &&
          (state != ERROR_STATE) ); /* loop until we reach an end state */

  /* provide feedback on which state the machine has been left in */
  if (state == END_STATE) printf("\nVALID sequence\n");
  else if (state == ERROR_STATE) printf("\nLexical error\n");

  return 0;
}

/* read the next character from the input stream and return its class */
int get_next_input()
{
  int next_char = getchar();

  if (next_char == EOF)
    return END_CHAR;
  else if ( isdigit(next_char) )
    return DIGIT;
  else if ( next_char == '+' )
    return PLUS;
  else if ( next_char == '*' )
    return TIMES;
  else if ( next_char == '(' )
    return L_PAREN;
  else if ( next_char == ')' )
    return R_PAREN;
  else if ( isalpha(next_char) )
    return CHAR;
  else if ( isdigit(next_char) )
    return DIGIT;
  else if ( isspace(next_char) )
    return WHITESPACE;
  else
    return OTHER_CHAR;
}

/* create the state table for mapping the current state and input character
 * to a new state */
void setup_state_table()
{
  int i;

  /* transitions for states A to I are (almost) identical */
  for (i=STATE_A; i<=STATE_G; i++)
    {
      state_table[i][END_CHAR]   = END_STATE;
      state_table[i][PLUS]       = STATE_B;
      state_table[i][TIMES]      = STATE_C;
      state_table[i][L_PAREN]    = STATE_D;
      state_table[i][R_PAREN]    = STATE_E;
      state_table[i][CHAR]       = STATE_F;
      state_table[i][DIGIT]      = STATE_G;
      state_table[i][WHITESPACE] = STATE_A;
      state_table[i][OTHER_CHAR] = ERROR_STATE;
    }

  /* for states B, C, we return a lexical error
   * if you get two operators in succession */
  for (i=STATE_B; i<=STATE_C; i++)
    {
      state_table[i][PLUS]       = ERROR_STATE;
      state_table[i][TIMES]      = ERROR_STATE;
    }

  /* state F handles identifiers: digits form part of same identifier */
  state_table[STATE_F][DIGIT]      = STATE_F;

  /* state G handles numbers: contiguous characters are not allowed */
  state_table[STATE_G][CHAR]       = ERROR_STATE;

}

/* the output table stores the strings output on a successfully
 * completed state */
void setup_output_table()
{
  /* clear out all the strings first */
  int i,j;

  for (i=0; i<num_states; i++)
    for (j=0; j<num_chars; j++)
      output_table[i][j] = "";

  /* the operators + - * / can all be legitimately ended with any symbol,
   * except for another operator */

  /* outputs for state B - the plus symbol */
  output_table[STATE_B][END_CHAR]   = "plus";
  output_table[STATE_B][L_PAREN]    = "plus";
  output_table[STATE_B][R_PAREN]    = "plus";
  output_table[STATE_B][CHAR]       = "plus";
  output_table[STATE_B][DIGIT]      = "plus";
  output_table[STATE_B][WHITESPACE] = "plus";
  output_table[STATE_B][OTHER_CHAR] = "plus";
  /* outputs for state C - the times symbol */
  output_table[STATE_C][END_CHAR]   = "times";
  output_table[STATE_C][L_PAREN]    = "times";
  output_table[STATE_C][R_PAREN]    = "times";
  output_table[STATE_C][CHAR]       = "times";
  output_table[STATE_C][DIGIT]      = "times";
  output_table[STATE_C][WHITESPACE] = "times";
  output_table[STATE_C][OTHER_CHAR] = "times";

  /* the ( ) symbols can both be ended by any input */
  /* outputs for state D - the open bracket symbol */
  output_table[STATE_D][END_CHAR]   = "l_paren";
  output_table[STATE_D][PLUS]       = "l_paren";
  output_table[STATE_D][TIMES]      = "l_paren";
  output_table[STATE_D][L_PAREN]    = "l_paren";
  output_table[STATE_D][R_PAREN]    = "l_paren";
  output_table[STATE_D][CHAR]       = "l_paren";
  output_table[STATE_D][DIGIT]      = "l_paren";
  output_table[STATE_D][WHITESPACE] = "l_paren";
  output_table[STATE_D][OTHER_CHAR] = "l_paren";
  /* outputs for state E - the close bracket symbol */
  output_table[STATE_E][END_CHAR]   = "r_paren";
  output_table[STATE_E][PLUS]       = "r_paren";
  output_table[STATE_E][TIMES]      = "r_paren";
  output_table[STATE_E][L_PAREN]    = "r_paren";
  output_table[STATE_E][R_PAREN]    = "r_paren";
  output_table[STATE_E][CHAR]       = "r_paren";
  output_table[STATE_E][DIGIT]      = "r_paren";
  output_table[STATE_E][WHITESPACE] = "r_paren";
  output_table[STATE_E][OTHER_CHAR] = "r_paren";

  /* an identifier can be finished by any input except an id or digit */
  /* outputs for state F - the identifier symbol */
  output_table[STATE_F][END_CHAR]   = "identifier";
  output_table[STATE_F][PLUS]       = "identifier";
  output_table[STATE_F][TIMES]      = "identifier";
  output_table[STATE_F][L_PAREN]    = "identifier";
  output_table[STATE_F][R_PAREN]    = "identifier";
  output_table[STATE_F][WHITESPACE] = "identifier";
  output_table[STATE_F][OTHER_CHAR] = "identifier";

  /* a digit can be finished by any input except a further digit */
  /* outputs for state G - the identifier symbol */
  output_table[STATE_G][END_CHAR]   = "num";
  output_table[STATE_G][PLUS]       = "num";
  output_table[STATE_G][TIMES]      = "num";
  output_table[STATE_G][L_PAREN]    = "num";
  output_table[STATE_G][R_PAREN]    = "num";
  output_table[STATE_G][CHAR]       = "num";
  output_table[STATE_G][WHITESPACE] = "num";
  output_table[STATE_G][OTHER_CHAR] = "num";
}



0
Comment
Question by:007singh
1 Comment
 
LVL 11

Accepted Solution

by:
cup earned 300 total points
ID: 8179183
1) Create a routine for get_current_input.  This is the result of get_next_input

2) In the parser, replace

current_symbol = input_array[ip];

with current_symbol = get_current_input.


3) Where you increment the input pointer, ip = ip + 1;, use current_symbol = get_next_input ();
0

Featured Post

Concerto Cloud for Software Providers & ISVs

Can Concerto Cloud Services help you focus on evolving your application offerings, while delivering the best cloud experience to your customers? From DevOps to revenue models and customer support, the answer is yes!

Learn how Concerto can help you.

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

Preface I don't like visual development tools that are supposed to write a program for me. Even if it is Xcode and I can use Interface Builder. Yes, it is a perfect tool and has helped me a lot, mainly, in the beginning, when my programs were small…
Examines three attack vectors, specifically, the different types of malware used in malicious attacks, web application attacks, and finally, network based attacks.  Concludes by examining the means of securing and protecting critical systems and inf…
The goal of this video is to provide viewers with basic examples to understand recursion in the C programming language.
Video by: Grant
The goal of this video is to provide viewers with basic examples to understand and use while-loops in the C programming language.

621 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question