?
Solved

HI NEED help with lexical anaylsis using C

Posted on 2003-03-19
1
Medium Priority
?
258 Views
Last Modified: 2010-04-15
Would it be possible for you to combine the lexical analyser and the LR parser so that the output of the lexical analyser can be used by the parser. It may be possible to do this by physically combining the lexical analyser and the LR parser into one file, or else stdout to read/pipe tokens, read by the analyser into the parser.

Here are the lex and LR files for your reference:

--------------------------------------------------------------------------------------------
/* A simple implementation of an LR parser
 *
 * -- current tables process a simple expression grammar

#include <stdio.h>
#include <stdlib.h>

/* The particular grammar accepted by an LR parser is determined
 * by the entries in its action and goto tables.

 * Define some constants to name the states and possible actions.
 */

/* note: ERROR_STATE does not form its own row in the tables,
 * and so is not counted for num_states */
enum states { STATE_0, STATE_1, STATE_2, STATE_3, STATE_4, STATE_5, STATE_6,
            STATE_7, STATE_8, STATE_9, STATE_10, STATE_11, ERROR_STATE };
#define num_states 12

enum terminals { PLUS, TIMES, L_PAREN, R_PAREN, ID, NUM, END_CHAR };
#define num_terminals 9

enum nonterminals { NON_EE, NON_E, NON_T, NON_F };
#define num_nonterminals 4

/* The action_table contains actions which are structures
 * each structure will holds its name and an argument
 * if the action is SHIFT_ACTION, then argument will hold the new state
 * if the action is REDUCE_ACTION, then argument will hold the rule to reduce with
 * (the ERROR_ACTION and ACCEPT_ACTION ignore the argument, so give it any value)
 */
enum action_name { SHIFT_ACTION, REDUCE_ACTION, ERROR_ACTION, ACCEPT_ACTION };

struct action {
  int name;
  int argument;
} ;

struct action action_table[num_states][num_terminals];
int goto_table[num_states][num_nonterminals];

void run_lr_parser(void);
void setup_action_table(void);
void setup_goto_table(void);

struct action make_action (int, int);

/* two arrays with info about the rules
 * Note, we only need to store the number of items in the right-hand-side
 * and the non-terminal token on the left-hand-side */
int right_side_size[7] = { 1, 3, 1, 3, 1, 3, 1 };
int left_side[7] = { NON_EE, NON_E, NON_E, NON_T, NON_T, NON_F, NON_F };


/* An LR parser needs a stack to hold states and symbols.
 * We define a simple stack of integers, which will hold
 * instances from the enumerated types of states and symbols. */

/* define the stack variables and functions */
#define MAX_STACK 100        /* the maximum size of our stack */
int lr_stack[MAX_STACK];     /* the stack is an array of integers */
int stack_pointer = 0;       /* points to the next free element in the stack */

void stack_clear (void);          /* clear the stack */
void stack_push (int );           /* adds an item to the stack */
int stack_pop (void);             /* removes the top item from the stack */
int stack_peek (void);            /* looks at the top item of the stack */
void stack_overflow_error(void);  /* report an error if too many items are pushed */
void stack_underflow_error(void); /* report an error if too many items are popped */

/* global array to hold values of the input tokens */
#define MAX_SIZE 100        /* NB: Increase if you want more than 100 tokens */
int input_array[MAX_SIZE];


int main ()
{
  /* load the example tokens into the input array */
  input_array[0] = ID;
  input_array[1] = TIMES;
  input_array[2] = ID;
  input_array[3] = PLUS;
  input_array[4] = ID;
  input_array[5] = END_CHAR;
  /* put an explicit end marker, in case of bad errors in action table */
  input_array[6] = -1;

  run_lr_parser();

  return 0;
}

/* the lr parser processes the tokens in the input_array */
void run_lr_parser()
{
  int ip = 0; /* point to start of input_array */
  /* some place holders for variables in the routine */
  int current_state;
  int current_symbol;
  int next_state;
  int next_symbol;
  int i;

  struct action current_action;

  setup_action_table();
  setup_goto_table();
  stack_push(STATE_0);  /* start with state_0 on the stack */
   
  do { /* this do loop repeats until an error or accept state is reached */

    current_state = stack_peek();
    current_symbol = input_array[ip];

    if (current_symbol == -1)
      {
      printf("LR Parsing error -- out of input tokens \n");
      exit(EXIT_FAILURE);
      }

    /* retrieve the current action from the action_table */
    current_action = action_table[current_state][current_symbol];
    printf("Current action %d %d\n", current_action.name, current_action.argument);
   
    if (current_action.name == SHIFT_ACTION)
      { /* a shift action simply places the symbol and
       * the action's argument onto the stack,
       * before stepping the program pointer on one */
      printf("Shift %d\n", current_action.argument);
      stack_push(current_symbol);
      stack_push(current_action.argument);
      ip = ip+1;
      }
    else if (current_action.name == REDUCE_ACTION)
      { /* a reduce action removes symbols from the stack,
       * the number to be removed is twice the size of the rule's right-hand-side
       */
      for (i=0; i< (2*right_side_size[current_action.argument]); i++)
        {
          stack_pop();
        }
      next_state = stack_peek();
      /* the next symbol to be placed on the stack is the non-terminal
       * on the left of the rule */
      next_symbol = left_side[current_action.argument];
      stack_push(next_symbol);
      /* you then look at the goto table to retrieve the next stack number */
      if (goto_table[next_state][next_symbol] == ERROR_STATE)
        {
          printf("LR Parsing Error\n");
          exit(EXIT_FAILURE);
        }
      else
        stack_push(goto_table[next_state][next_symbol]);

      printf("Reduce %d\n", current_action.argument);
      }
    else if (current_action.name == ERROR_ACTION)
      {
      printf("LR Parsing Error\n");
      exit(EXIT_FAILURE);
      }
    else /* if (current_action.name == ACCEPT_ACTION) */
      {
      printf("LR Parser accepted input\n");
      break;
      }

  } while (1==1); /* repeats forever */

  /* you should break out to here if successful */
}

/* ********** handle the action and goto tables ********** */
void setup_action_table ()
{
  int i, j;

  /* clear the action table */
  for (i=0; i<num_states; i++)
    for (j=0; j<num_terminals; j++)
      action_table[i][j] = make_action( ERROR_ACTION, ERROR_STATE );

  /* state 0 */
  action_table[STATE_0][ID] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_0][NUM] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_0][L_PAREN] = make_action ( SHIFT_ACTION, STATE_4 );
  /* state 1 */
  action_table[STATE_1][PLUS] = make_action ( SHIFT_ACTION, STATE_6 );
  action_table[STATE_1][END_CHAR] = make_action ( ACCEPT_ACTION, STATE_0);
  /* state 2 */
  action_table[STATE_2][PLUS] = make_action ( REDUCE_ACTION, 2 );
  action_table[STATE_2][TIMES] = make_action ( SHIFT_ACTION, STATE_7 );
  action_table[STATE_2][R_PAREN] = make_action ( REDUCE_ACTION, 2 );
  action_table[STATE_2][END_CHAR] = make_action ( REDUCE_ACTION, 2 );
  /* state 3 */
  action_table[STATE_3][PLUS] = make_action ( REDUCE_ACTION, 4 );
  action_table[STATE_3][TIMES] = make_action ( REDUCE_ACTION, 4 );
  action_table[STATE_3][R_PAREN] = make_action ( REDUCE_ACTION, 4);
  action_table[STATE_3][END_CHAR] = make_action ( REDUCE_ACTION, 4);
  /* state 4 */
  action_table[STATE_4][L_PAREN] = make_action ( SHIFT_ACTION, STATE_4 );
  action_table[STATE_4][ID] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_4][NUM] = make_action ( SHIFT_ACTION, STATE_5 );
  /* state 5 */
  action_table[STATE_5][PLUS] = make_action ( REDUCE_ACTION, 6 );
  action_table[STATE_5][TIMES] = make_action ( REDUCE_ACTION, 6 );
  action_table[STATE_5][R_PAREN] = make_action (REDUCE_ACTION, 6);
  action_table[STATE_5][END_CHAR] = make_action (REDUCE_ACTION, 6);
  /* state 6 */
  action_table[STATE_6][L_PAREN] = make_action ( SHIFT_ACTION, STATE_4 );
  action_table[STATE_6][ID] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_6][NUM] = make_action ( SHIFT_ACTION, STATE_5 );
  /* state 7 */
  action_table[STATE_7][L_PAREN] = make_action ( SHIFT_ACTION, STATE_4 );
  action_table[STATE_7][ID] = make_action ( SHIFT_ACTION, STATE_5 );
  action_table[STATE_7][NUM] = make_action ( SHIFT_ACTION, STATE_5 );
  /* state 8 */
  action_table[STATE_8][PLUS] = make_action ( SHIFT_ACTION, STATE_6 );
  action_table[STATE_8][R_PAREN] = make_action ( SHIFT_ACTION, STATE_11 );
  /* state 9 */
  action_table[STATE_9][PLUS] = make_action ( REDUCE_ACTION, 1 );
  action_table[STATE_9][TIMES] = make_action (SHIFT_ACTION, 7 );
  action_table[STATE_9][R_PAREN] = make_action (REDUCE_ACTION, 1 );
  action_table[STATE_9][END_CHAR] = make_action (REDUCE_ACTION, 1 );
  /* state 10 */
  action_table[STATE_10][PLUS] = make_action ( REDUCE_ACTION, 3 );
  action_table[STATE_10][TIMES] = make_action ( REDUCE_ACTION, 3 );
  action_table[STATE_10][R_PAREN] = make_action (REDUCE_ACTION, 3);
  action_table[STATE_10][END_CHAR] = make_action (REDUCE_ACTION, 3);
  /* state 11 */
  action_table[STATE_11][PLUS] = make_action ( REDUCE_ACTION, 5 );
  action_table[STATE_11][TIMES] = make_action (REDUCE_ACTION, 5 );
  action_table[STATE_11][R_PAREN] = make_action (REDUCE_ACTION, 5);
  action_table[STATE_11][END_CHAR] = make_action (REDUCE_ACTION, 5);
}

void setup_goto_table ()
{
  int i, j;

  /* clear the goto table */
  for (i=0; i<num_states; i++)
    for (j=0; j<num_nonterminals; j++)
      goto_table[i][j] = ERROR_STATE;

  /* entries for the goto table */
  goto_table[STATE_0][NON_E] = STATE_1;
  goto_table[STATE_0][NON_T] = STATE_2;
  goto_table[STATE_0][NON_F] = STATE_3;
  goto_table[STATE_4][NON_E] = STATE_8;
  goto_table[STATE_4][NON_T] = STATE_2;
  goto_table[STATE_4][NON_F] = STATE_3;
  goto_table[STATE_6][NON_T] = STATE_9;
  goto_table[STATE_6][NON_F] = STATE_3;
  goto_table[STATE_7][NON_F] = STATE_10;  

}

/* construct an action structure given its two arguments */
struct action make_action (int name, int argument)
{
  struct action new_action;

  new_action.name = name;
  new_action.argument = argument;
  return new_action;
}

/* ********** functions for the stack ********** */
void stack_clear ()
{
  stack_pointer = 0;
}

void stack_push (int new_item)
{
  if (stack_pointer < MAX_STACK)
    {
      /* add item to the stack, and advance the stack pointer */
      lr_stack[stack_pointer] = new_item;
      stack_pointer = stack_pointer + 1;
    }
  else
    {
      stack_overflow_error();
    }
}

int stack_pop ()
{
  if (stack_pointer == 0)
    {
      stack_underflow_error();
    }

  stack_pointer = stack_pointer - 1;
  return lr_stack[stack_pointer];
     
}

int stack_peek ()
{
  if (stack_pointer == 0)
    {
      stack_underflow_error();
    }

  return lr_stack[stack_pointer-1];
}

void stack_overflow_error ()
{
  printf("Error: stack overflow\n");
  exit(0);
}

void stack_underflow_error ()
{
  printf("Error: stack underflow\n");
  exit(0);
}

----------------------------------------------------------------------------------------------
/* lexical analyser

 * outputs tokens for + * ( ) id num
*/

#include<stdio.h>
#include<string.h>
#include<ctype.h>

/* define the functions */
int get_next_input(void);
void setup_state_table(void);
void setup_output_table(void);

/* define the character classes and states */
enum chars { END_CHAR, PLUS, TIMES, L_PAREN, R_PAREN,
           CHAR, DIGIT, WHITESPACE, OTHER_CHAR };
#define num_chars 9

enum states { STATE_A, STATE_B, STATE_C, STATE_D, STATE_E, STATE_F, STATE_G,
            END_STATE, ERROR_STATE };
#define num_states 9

/* provide 2-dimensional arrays to store the transition and output tables */
int state_table[num_states][num_chars];
char * output_table[num_states][num_chars];

int main()
{
  int state = STATE_A;   /* the start state */
  int input;

  setup_state_table();
  setup_output_table();

  do {
    input = get_next_input();
    /* output the message for this state, if present */
    if (strcmp(output_table[state][input], ""))
      printf("%s\n", output_table[state][input]);

    state = state_table[state][input];
  } while ( (state != END_STATE) &&
          (state != ERROR_STATE) ); /* loop until we reach an end state */

  /* provide feedback on which state the machine has been left in */
  if (state == END_STATE) printf("\nVALID sequence\n");
  else if (state == ERROR_STATE) printf("\nLexical error\n");

  return 0;
}

/* read the next character from the input stream and return its class */
int get_next_input()
{
  int next_char = getchar();

  if (next_char == EOF)
    return END_CHAR;
  else if ( isdigit(next_char) )
    return DIGIT;
  else if ( next_char == '+' )
    return PLUS;
  else if ( next_char == '*' )
    return TIMES;
  else if ( next_char == '(' )
    return L_PAREN;
  else if ( next_char == ')' )
    return R_PAREN;
  else if ( isalpha(next_char) )
    return CHAR;
  else if ( isdigit(next_char) )
    return DIGIT;
  else if ( isspace(next_char) )
    return WHITESPACE;
  else
    return OTHER_CHAR;
}

/* create the state table for mapping the current state and input character
 * to a new state */
void setup_state_table()
{
  int i;

  /* transitions for states A to I are (almost) identical */
  for (i=STATE_A; i<=STATE_G; i++)
    {
      state_table[i][END_CHAR]   = END_STATE;
      state_table[i][PLUS]       = STATE_B;
      state_table[i][TIMES]      = STATE_C;
      state_table[i][L_PAREN]    = STATE_D;
      state_table[i][R_PAREN]    = STATE_E;
      state_table[i][CHAR]       = STATE_F;
      state_table[i][DIGIT]      = STATE_G;
      state_table[i][WHITESPACE] = STATE_A;
      state_table[i][OTHER_CHAR] = ERROR_STATE;
    }

  /* for states B, C, we return a lexical error
   * if you get two operators in succession */
  for (i=STATE_B; i<=STATE_C; i++)
    {
      state_table[i][PLUS]       = ERROR_STATE;
      state_table[i][TIMES]      = ERROR_STATE;
    }

  /* state F handles identifiers: digits form part of same identifier */
  state_table[STATE_F][DIGIT]      = STATE_F;

  /* state G handles numbers: contiguous characters are not allowed */
  state_table[STATE_G][CHAR]       = ERROR_STATE;

}

/* the output table stores the strings output on a successfully
 * completed state */
void setup_output_table()
{
  /* clear out all the strings first */
  int i,j;

  for (i=0; i<num_states; i++)
    for (j=0; j<num_chars; j++)
      output_table[i][j] = "";

  /* the operators + - * / can all be legitimately ended with any symbol,
   * except for another operator */

  /* outputs for state B - the plus symbol */
  output_table[STATE_B][END_CHAR]   = "plus";
  output_table[STATE_B][L_PAREN]    = "plus";
  output_table[STATE_B][R_PAREN]    = "plus";
  output_table[STATE_B][CHAR]       = "plus";
  output_table[STATE_B][DIGIT]      = "plus";
  output_table[STATE_B][WHITESPACE] = "plus";
  output_table[STATE_B][OTHER_CHAR] = "plus";
  /* outputs for state C - the times symbol */
  output_table[STATE_C][END_CHAR]   = "times";
  output_table[STATE_C][L_PAREN]    = "times";
  output_table[STATE_C][R_PAREN]    = "times";
  output_table[STATE_C][CHAR]       = "times";
  output_table[STATE_C][DIGIT]      = "times";
  output_table[STATE_C][WHITESPACE] = "times";
  output_table[STATE_C][OTHER_CHAR] = "times";

  /* the ( ) symbols can both be ended by any input */
  /* outputs for state D - the open bracket symbol */
  output_table[STATE_D][END_CHAR]   = "l_paren";
  output_table[STATE_D][PLUS]       = "l_paren";
  output_table[STATE_D][TIMES]      = "l_paren";
  output_table[STATE_D][L_PAREN]    = "l_paren";
  output_table[STATE_D][R_PAREN]    = "l_paren";
  output_table[STATE_D][CHAR]       = "l_paren";
  output_table[STATE_D][DIGIT]      = "l_paren";
  output_table[STATE_D][WHITESPACE] = "l_paren";
  output_table[STATE_D][OTHER_CHAR] = "l_paren";
  /* outputs for state E - the close bracket symbol */
  output_table[STATE_E][END_CHAR]   = "r_paren";
  output_table[STATE_E][PLUS]       = "r_paren";
  output_table[STATE_E][TIMES]      = "r_paren";
  output_table[STATE_E][L_PAREN]    = "r_paren";
  output_table[STATE_E][R_PAREN]    = "r_paren";
  output_table[STATE_E][CHAR]       = "r_paren";
  output_table[STATE_E][DIGIT]      = "r_paren";
  output_table[STATE_E][WHITESPACE] = "r_paren";
  output_table[STATE_E][OTHER_CHAR] = "r_paren";

  /* an identifier can be finished by any input except an id or digit */
  /* outputs for state F - the identifier symbol */
  output_table[STATE_F][END_CHAR]   = "identifier";
  output_table[STATE_F][PLUS]       = "identifier";
  output_table[STATE_F][TIMES]      = "identifier";
  output_table[STATE_F][L_PAREN]    = "identifier";
  output_table[STATE_F][R_PAREN]    = "identifier";
  output_table[STATE_F][WHITESPACE] = "identifier";
  output_table[STATE_F][OTHER_CHAR] = "identifier";

  /* a digit can be finished by any input except a further digit */
  /* outputs for state G - the identifier symbol */
  output_table[STATE_G][END_CHAR]   = "num";
  output_table[STATE_G][PLUS]       = "num";
  output_table[STATE_G][TIMES]      = "num";
  output_table[STATE_G][L_PAREN]    = "num";
  output_table[STATE_G][R_PAREN]    = "num";
  output_table[STATE_G][CHAR]       = "num";
  output_table[STATE_G][WHITESPACE] = "num";
  output_table[STATE_G][OTHER_CHAR] = "num";
}



0
Comment
Question by:007singh
[X]
Welcome to Experts Exchange

Add your voice to the tech community where 5M+ people just like you are talking about what matters.

  • Help others & share knowledge
  • Earn cash & points
  • Learn & ask questions
1 Comment
 
LVL 11

Accepted Solution

by:
cup earned 300 total points
ID: 8179183
1) Create a routine for get_current_input.  This is the result of get_next_input

2) In the parser, replace

current_symbol = input_array[ip];

with current_symbol = get_current_input.


3) Where you increment the input pointer, ip = ip + 1;, use current_symbol = get_next_input ();
0

Featured Post

VIDEO: THE CONCERTO CLOUD FOR HEALTHCARE

Modern healthcare requires a modern cloud. View this brief video to understand how the Concerto Cloud for Healthcare can help your organization.

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

Preface I don't like visual development tools that are supposed to write a program for me. Even if it is Xcode and I can use Interface Builder. Yes, it is a perfect tool and has helped me a lot, mainly, in the beginning, when my programs were small…
This tutorial is posted by Aaron Wojnowski, administrator at SDKExpert.net.  To view more iPhone tutorials, visit www.sdkexpert.net. This is a very simple tutorial on finding the user's current location easily. In this tutorial, you will learn ho…
Video by: Grant
The goal of this video is to provide viewers with basic examples to understand and use nested-loops in the C programming language.
Video by: Grant
The goal of this video is to provide viewers with basic examples to understand and use while-loops in the C programming language.
Suggested Courses

752 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question