Difference between revisions of "Recursive descent parsing"

From CodeCodex

 
(Implementations)
Line 126: Line 126:
 
     expect(period);
 
     expect(period);
 
}
 
}
 +
</pre>
 +
 +
===Java===
 +
This program illustrates a simple recursive descent parser which reads simple integer expressions in +, -, *, /, and ( ... ) and calculates the answer. The program is broken down as follows:
 +
 +
* A recursive descent parser.
 +
* The main driver; reads from the console (not a file).
 +
* The scanner.
 +
* A class to define the allowable tokens.
 +
 +
Original Source:[http://www.cs.wm.edu/~noonan/java/parsing/]
 +
====Parser.java====
 +
<pre>
 +
import java.io.*;
 +
 +
 +
/* This program illustrates recursive descent parsing using a
 +
  pure procedural approach.
 +
 +
  The grammar:
 +
 +
  statement = { expression  ";" } "." 
 +
  expression = term { ( "+" | "-" ) term }
 +
  term      = factor { ( "*" | "/" ) factor }
 +
  factor    = number | "(" expression ")"
 +
   
 +
*/
 +
 +
public class Parser {
 +
 +
    private Scanner scanner;
 +
 +
 +
public Parser(Scanner scanner) {
 +
    this.scanner = scanner;
 +
} // Parser
 +
 +
 +
public void run ( ) {
 +
    scanner.getToken( );
 +
    statement( );
 +
} // run
 +
 +
 +
private void statement ( ) {
 +
    //  statement = { expression  ";" } "." 
 +
    while(scanner.token != Token.period) {
 +
int value = expression( );
 +
System.out.println("=> " + value);
 +
scanner.getToken( );  // flush ";"
 +
    } // while
 +
} // statement
 +
 +
 +
private int expression ( ) {
 +
    //    expression = term { ( "+" | "-" ) term }
 +
    int left = term( );
 +
    while (scanner.token == Token.plusop ||
 +
  scanner.token == Token.minusop) {
 +
int saveToken = scanner.token;
 +
scanner.getToken( );
 +
switch (saveToken) {
 +
    case Token.plusop:
 +
left += term( );
 +
break;
 +
    case Token.minusop:
 +
left -= term( );
 +
break;
 +
} // switch
 +
    } // while
 +
    return left;
 +
} // expression
 +
 +
 +
private int term ( ) {
 +
    //    term = factor { ( "*" | "/" ) factor }
 +
    int left = factor( );
 +
    while (scanner.token == Token.timesop ||
 +
  scanner.token == Token.divideop) {
 +
int saveToken = scanner.token;
 +
scanner.getToken( );
 +
switch (saveToken) {
 +
    case Token.timesop:
 +
left *= factor( );
 +
break;
 +
    case Token.divideop:
 +
left /= factor( );
 +
break;
 +
} // switch
 +
    } // while
 +
    return left;
 +
} // term
 +
 +
 +
private int factor ( ) {
 +
    //    factor    = number | "(" expression ")"
 +
    int value = 0;
 +
    switch (scanner.token) {
 +
case Token.number:
 +
    value = scanner.number( );
 +
    scanner.getToken( );  // flush number
 +
    break;
 +
case Token.lparen:
 +
    scanner.getToken( );
 +
    value = expression( );
 +
    if (scanner.token != Token.rparen)
 +
scanner.error("Missing ')'");
 +
    scanner.getToken( );  // flush ")"
 +
    break;
 +
default:
 +
    scanner.error("Expecting number or (");
 +
    break;
 +
    } // switch
 +
    return value;
 +
} // factor
 +
 +
} // class Parser
 +
</pre>
 +
 +
====Scanner.java====
 +
<pre>
 +
import java.io.*;
 +
 +
public class Scanner {
 +
 +
private char ch = ' ';
 +
 +
private char ident = ' ';
 +
 +
private int intValue = 0;
 +
 +
private Buffer buffer;
 +
 +
public int token;
 +
 +
 +
public Scanner (DataInputStream in) {
 +
    buffer = new Buffer(in);
 +
    token = Token.semicolon;
 +
} // Scanner
 +
 +
 +
public int getToken ( ) {
 +
    while (Character.isWhitespace(ch))
 +
ch = buffer.get( );
 +
    if (Character.isLetter(ch)) {
 +
ident = Character.toLowerCase(ch);
 +
ch = buffer.get( );
 +
token = Token.letter;
 +
    }
 +
    else if (Character.isDigit(ch)) {
 +
intValue = getNumber( );
 +
token = Token.number;
 +
    }
 +
    else {
 +
switch (ch) {
 +
    case ';' : ch = buffer.get( );
 +
token = Token.semicolon;
 +
break;
 +
 +
    case '.' : ch = buffer.get( );
 +
token = Token.period;
 +
break;
 +
 +
    case '+' : ch = buffer.get( );
 +
token = Token.plusop;
 +
break;
 +
 +
    case '-' : ch = buffer.get( );
 +
token = Token.minusop;
 +
break;
 +
 +
    case '*' : ch = buffer.get( );
 +
token = Token.timesop;
 +
break;
 +
 +
    case '/' : ch = buffer.get( );
 +
token = Token.divideop;
 +
break;
 +
 +
    case '=' : ch = buffer.get( );
 +
token = Token.assignop;
 +
break;
 +
 +
    case '(' : ch = buffer.get( );
 +
token = Token.lparen;
 +
break;
 +
 +
    case ')' : ch = buffer.get( );
 +
token = Token.rparen;
 +
break;
 +
 +
    default : error ("Illegal character " + ch );
 +
break;
 +
} // switch
 +
    } // if
 +
    return token;
 +
} // getToken
 +
 +
 +
public int number ( ) {
 +
    return intValue;
 +
} // number
 +
 +
 +
public char letter ( ) {
 +
    return ident;
 +
} // letter
 +
 +
 +
public void match (int which) {
 +
    token = getToken( );
 +
    if (token != which) {
 +
error("Invalid token " + Token.toString(token) +
 +
      "-- expecting " + Token.toString(which));
 +
System.exit(1);
 +
    } // if
 +
} // match
 +
 +
 +
public void error (String msg) {
 +
    System.err.println(msg);
 +
    System.exit(1);
 +
} // error
 +
 +
 +
private int getNumber ( ) {
 +
    int rslt = 0;
 +
    do {
 +
rslt = rslt * 10 + Character.digit(ch, 10);
 +
ch = buffer.get( );
 +
    } while (Character.isDigit(ch));
 +
    return rslt;
 +
} // getNumber
 +
 +
} // Scanner
 +
 +
 +
 +
class Buffer {
 +
 +
private String line = "";
 +
 +
private int column = 0;
 +
 +
private int lineNo = 0;
 +
 +
private DataInputStream  in;
 +
 +
 +
public Buffer (DataInputStream in) {
 +
    this.in = in;
 +
} // Buffer
 +
 +
 +
public char get ( ) {
 +
    column++;
 +
    if (column >= line.length()) {
 +
try {
 +
    line = in.readLine( );
 +
} catch (Exception e) {
 +
    System.err.println("Invalid read operation");
 +
    System.exit(1);
 +
} // try
 +
if (line == null)
 +
    System.exit(0);
 +
column = 0;
 +
lineNo++;
 +
System.out.println(line);
 +
line = line + "\n";
 +
    } // if column
 +
    return line.charAt(column);
 +
} // get
 +
 +
} // class Buffer
 +
</pre>
 +
 +
====Token.java====
 +
<pre>
 +
class Token {
 +
 +
public static final int semicolon = 0;
 +
public static final int period    = 1;
 +
public static final int plusop    = 2;
 +
public static final int minusop  = 3;
 +
public static final int timesop  = 4;
 +
public static final int divideop  = 5;
 +
public static final int assignop  = 6;
 +
public static final int lparen    = 7;
 +
public static final int rparen    = 8;
 +
public static final int letter    = 9;
 +
public static final int number    = 10;
 +
 +
private static String[] spelling = {
 +
    ";", ".", "+", "-", "*", "/", "=", "(", ")",
 +
    "letter", "number"};
 +
 +
public static String toString (int i) {
 +
    if (i < 0 || i > number)
 +
return "";
 +
    return spelling[i];
 +
} // toString
 +
 +
} // Token
 +
</pre>
 +
 +
====Test.java====
 +
<pre>
 +
import java.io.*;
 +
 +
public class Test {
 +
 +
public static void main(String args[]) {
 +
    // command args ignored
 +
    Parser parser = new Parser(new Scanner(
 +
new DataInputStream(System.in)));
 +
    parser.run( );
 +
    System.out.println("done");
 +
} // main
 +
   
 +
} // class Test
 
</pre>
 
</pre>
 
[[Category:C]]
 
[[Category:C]]
 +
[[Category:Java]]

Revision as of 18:14, 16 May 2006

Implementations

C

typedef enum {ident, number, lparen, rparen, times, slash, plus,
    minus, eql, neq, lss, leq, gtr, geq, callsym, beginsym, semicolon,
    endsym, ifsym, whilesym, becomes, thensym, dosym, constsym, comma,
    varsym, procsym, period, oddsym} Symbol;

Symbol sym;
void getsym(void);
void error(const char msg[]);
void expression(void);

int accept(Symbol s) {
    if (sym == s) {
        getsym();
        return 1;
    }
    return 0;
}

int expect(Symbol s) {
    if (accept(s))
        return 1;
    error("expect: unexpected symbol");
    return 0;
}

void factor(void) {
    if (accept(ident)) {
        ;
    } else if (accept(number)) {
        ;
    } else if (accept(lparen)) {
        expression();
        expect(rparen);
    } else {
        error("factor: syntax error");
        getsym();
    }
}

void term(void) {
    factor();
    while (sym == times || sym == slash) {
        getsym();
        factor();
    }
}

void expression(void) {
    if (sym == plus || sym == minus)
        getsym();
    term();
    while (sym == plus || sym == minus) {
        getsym();
        term();
    }
}

void condition(void) {
    if (accept(oddsym)) {
        expression();
    } else {
        expression();
        if (sym == eql || sym == neq || sym == lss ||
            sym == leq || sym == gtr || sym == geq) {
            getsym();
            expression();
        } else {
            error("condition: invalid operator");
            getsym();
        }
    }
}

void statement(void) {
    if (accept(ident)) {
        expect(becomes);
        expression();
    } else if (accept(callsym)) {
        expect(ident);
    } else if (accept(beginsym)) {
        do {
            statement();
        } while (accept(semicolon));
        expect(endsym);
    } else if (accept(ifsym)) {
        condition();
        expect(thensym);
        statement();
    } else if (accept(whilesym)) {
        condition();
        expect(dosym);
        statement();
    }
}

void block(void) {
    if (accept(constsym)) {
        do {
            expect(ident);
            expect(eql);
            expect(number);
        } while (accept(comma));
        expect(semicolon);
    }
    if (accept(varsym)) {
        do {
            expect(ident);
        } while (accept(comma));
        expect(semicolon);
    }
    while (accept(procsym)) {
        expect(ident);
        expect(semicolon);
        block();
        expect(semicolon);
    }
    statement();
}

void program(void) {
    getsym();
    block();
    expect(period);
}

Java

This program illustrates a simple recursive descent parser which reads simple integer expressions in +, -, *, /, and ( ... ) and calculates the answer. The program is broken down as follows:

  • A recursive descent parser.
  • The main driver; reads from the console (not a file).
  • The scanner.
  • A class to define the allowable tokens.

Original Source:[1]

Parser.java

import java.io.*;


/* This program illustrates recursive descent parsing using a
   pure procedural approach.

   The grammar:

   statement = { expression  ";" } "."  
   expression = term { ( "+" | "-" ) term }
   term      = factor { ( "*" | "/" ) factor }
   factor    = number | "(" expression ")"
    
*/

public class Parser {

    private Scanner scanner;


public Parser(Scanner scanner) {
    this.scanner = scanner;
} // Parser


public void run ( ) {
    scanner.getToken( );
    statement( );
} // run


private void statement ( ) {
    //   statement = { expression  ";" } "."  
    while(scanner.token != Token.period) {
	int value = expression( );
	System.out.println("=> " + value);
	scanner.getToken( );  // flush ";"
    } // while
} // statement


private int expression ( ) {
    //    expression = term { ( "+" | "-" ) term }
    int left = term( );
    while (scanner.token == Token.plusop || 
	   scanner.token == Token.minusop) {
	int saveToken = scanner.token;
	scanner.getToken( );
	switch (saveToken) {
	    case Token.plusop:
		left += term( );
		break;
	    case Token.minusop:
		left -= term( );
		break;
	} // switch
    } // while
    return left;
} // expression


private int term ( ) {
    //    term = factor { ( "*" | "/" ) factor }
    int left = factor( );
    while (scanner.token == Token.timesop || 
	   scanner.token == Token.divideop) {
	int saveToken = scanner.token;
	scanner.getToken( );
	switch (saveToken) {
	    case Token.timesop:
		left *= factor( );
		break;
	    case Token.divideop:
		left /= factor( );
		break;
	} // switch
    } // while
    return left;
} // term


private int factor ( ) {
    //    factor    = number | "(" expression ")"
    int value = 0;
    switch (scanner.token) {
	case Token.number:
	    value = scanner.number( );
	    scanner.getToken( );  // flush number
	    break;
	case Token.lparen:
	    scanner.getToken( );
	    value = expression( );
	    if (scanner.token != Token.rparen)
		scanner.error("Missing ')'");
	    scanner.getToken( );  // flush ")"
	    break;
	default:
	    scanner.error("Expecting number or (");
	    break;
    } // switch
    return value;
} // factor

} // class Parser

Scanner.java

import java.io.*;

public class Scanner {

private char ch = ' ';

private char ident = ' ';

private int intValue = 0;

private Buffer buffer;

public int token;


public Scanner (DataInputStream in) {
    buffer = new Buffer(in);
    token = Token.semicolon;
} // Scanner


public int getToken ( ) {
    while (Character.isWhitespace(ch))
	ch = buffer.get( );
    if (Character.isLetter(ch)) {
	ident = Character.toLowerCase(ch);
	ch = buffer.get( );
	token = Token.letter;
    }
    else if (Character.isDigit(ch)) {
	intValue = getNumber( );
	token = Token.number;
    } 
    else {
	switch (ch) {
	    case ';' : ch = buffer.get( );
		token = Token.semicolon;
		break;

	    case '.' : ch = buffer.get( );
		token = Token.period;
		break;

	    case '+' : ch = buffer.get( );
		token = Token.plusop;
		break;

	    case '-' : ch = buffer.get( );
		token = Token.minusop;
		break;

	    case '*' : ch = buffer.get( );
		token = Token.timesop;
		break;

	    case '/' : ch = buffer.get( );
		token = Token.divideop;
		break;

	    case '=' : ch = buffer.get( );
		token = Token.assignop;
		break;

	    case '(' : ch = buffer.get( );
		token = Token.lparen;
		break;

	    case ')' : ch = buffer.get( );
		token = Token.rparen;
		break;

	    default : error ("Illegal character " + ch );
		break;
	} // switch
    } // if
    return token;
} // getToken


public int number ( ) {
    return intValue;
} // number


public char letter ( ) {
    return ident;
} // letter


public void match (int which) {
    token = getToken( );
    if (token != which) {
	error("Invalid token " + Token.toString(token) +
	      "-- expecting " + Token.toString(which));
	System.exit(1);
    } // if
} // match


public void error (String msg) {
    System.err.println(msg);
    System.exit(1);
} // error


private int getNumber ( ) {
    int rslt = 0;
    do {
	rslt = rslt * 10 + Character.digit(ch, 10);
	ch = buffer.get( );
    } while (Character.isDigit(ch));
    return rslt;
} // getNumber

} // Scanner



class Buffer {

private String line = "";

private int column = 0;

private int lineNo = 0;

private DataInputStream  in;


public Buffer (DataInputStream in) {
    this.in = in;
} // Buffer


public char get ( ) {
    column++;
    if (column >= line.length()) {
	try {
	    line = in.readLine( );
	} catch (Exception e) {
	    System.err.println("Invalid read operation");
	    System.exit(1);
	} // try
	if (line == null) 
	    System.exit(0);
	column = 0;
	lineNo++;
	System.out.println(line);
	line = line + "\n";
    } // if column
    return line.charAt(column);
} // get

} // class Buffer

Token.java

class Token {

public static final int semicolon = 0;
public static final int period    = 1;
public static final int plusop    = 2;
public static final int minusop   = 3;
public static final int timesop   = 4;
public static final int divideop  = 5;
public static final int assignop  = 6;
public static final int lparen    = 7;
public static final int rparen    = 8;
public static final int letter    = 9;
public static final int number    = 10;

private static String[] spelling = {
    ";", ".", "+", "-", "*", "/", "=", "(", ")",
    "letter", "number"};

public static String toString (int i) {
    if (i < 0 || i > number)
	return "";
    return spelling[i];
} // toString

} // Token

Test.java

import java.io.*;

public class Test {

public static void main(String args[]) {
    // command args ignored
    Parser parser = new Parser(new Scanner(
	new DataInputStream(System.in)));
    parser.run( );
    System.out.println("done");
} // main
    
} // class Test