Recursive descent parsing
From CodeCodex
Contents |
[edit] Implementations
[edit] C
This source was copied from: http://en.wikipedia.org/wiki/Recursive_descent_parser
typedef enum {ident, number, lparen, rparen, times, slash, plus,
minus, eql, neq, lss, leq, gtr, geq, callsym, beginsym, semicolon,
endsym, ifsym, whilesym, becomes, thensym, dosym, constsym, comma,
varsym, procsym, period, oddsym} Symbol;
Symbol sym;
void getsym(void);
void error(const char msg[]);
void expression(void);
int accept(Symbol s) {
if (sym == s) {
getsym();
return 1;
}
return 0;
}
int expect(Symbol s) {
if (accept(s))
return 1;
error("expect: unexpected symbol");
return 0;
}
void factor(void) {
if (accept(ident)) {
;
} else if (accept(number)) {
;
} else if (accept(lparen)) {
expression();
expect(rparen);
} else {
error("factor: syntax error");
getsym();
}
}
void term(void) {
factor();
while (sym == times || sym == slash) {
getsym();
factor();
}
}
void expression(void) {
if (sym == plus || sym == minus)
getsym();
term();
while (sym == plus || sym == minus) {
getsym();
term();
}
}
void condition(void) {
if (accept(oddsym)) {
expression();
} else {
expression();
if (sym == eql || sym == neq || sym == lss ||
sym == leq || sym == gtr || sym == geq) {
getsym();
expression();
} else {
error("condition: invalid operator");
getsym();
}
}
}
void statement(void) {
if (accept(ident)) {
expect(becomes);
expression();
} else if (accept(callsym)) {
expect(ident);
} else if (accept(beginsym)) {
do {
statement();
} while (accept(semicolon));
expect(endsym);
} else if (accept(ifsym)) {
condition();
expect(thensym);
statement();
} else if (accept(whilesym)) {
condition();
expect(dosym);
statement();
}
}
void block(void) {
if (accept(constsym)) {
do {
expect(ident);
expect(eql);
expect(number);
} while (accept(comma));
expect(semicolon);
}
if (accept(varsym)) {
do {
expect(ident);
} while (accept(comma));
expect(semicolon);
}
while (accept(procsym)) {
expect(ident);
expect(semicolon);
block();
expect(semicolon);
}
statement();
}
void program(void) {
getsym();
block();
expect(period);
}
[edit] Java
This program illustrates a simple recursive descent parser which reads simple integer expressions in +, -, *, /, and ( ... ) and calculates the answer. The program is broken down as follows:
- A recursive descent parser.
- The main driver; reads from the console (not a file).
- The scanner.
- A class to define the allowable tokens.
Original Source:[1]
[edit] Parser.java
import java.io.*;
/* This program illustrates recursive descent parsing using a
pure procedural approach.
The grammar:
statement = { expression ";" } "."
expression = term { ( "+" | "-" ) term }
term = factor { ( "*" | "/" ) factor }
factor = number | "(" expression ")"
*/
public class Parser {
private Scanner scanner;
public Parser(Scanner scanner) {
this.scanner = scanner;
} // Parser
public void run ( ) {
scanner.getToken( );
statement( );
} // run
private void statement ( ) {
// statement = { expression ";" } "."
while(scanner.token != Token.period) {
int value = expression( );
System.out.println("=> " + value);
scanner.getToken( ); // flush ";"
} // while
} // statement
private int expression ( ) {
// expression = term { ( "+" | "-" ) term }
int left = term( );
while (scanner.token == Token.plusop ||
scanner.token == Token.minusop) {
int saveToken = scanner.token;
scanner.getToken( );
switch (saveToken) {
case Token.plusop:
left += term( );
break;
case Token.minusop:
left -= term( );
break;
} // switch
} // while
return left;
} // expression
private int term ( ) {
// term = factor { ( "*" | "/" ) factor }
int left = factor( );
while (scanner.token == Token.timesop ||
scanner.token == Token.divideop) {
int saveToken = scanner.token;
scanner.getToken( );
switch (saveToken) {
case Token.timesop:
left *= factor( );
break;
case Token.divideop:
left /= factor( );
break;
} // switch
} // while
return left;
} // term
private int factor ( ) {
// factor = number | "(" expression ")"
int value = 0;
switch (scanner.token) {
case Token.number:
value = scanner.number( );
scanner.getToken( ); // flush number
break;
case Token.lparen:
scanner.getToken( );
value = expression( );
if (scanner.token != Token.rparen)
scanner.error("Missing ')'");
scanner.getToken( ); // flush ")"
break;
default:
scanner.error("Expecting number or (");
break;
} // switch
return value;
} // factor
} // class Parser
[edit] Scanner.java
import java.io.*;
public class Scanner {
private char ch = ' ';
private char ident = ' ';
private int intValue = 0;
private Buffer buffer;
public int token;
public Scanner (DataInputStream in) {
buffer = new Buffer(in);
token = Token.semicolon;
} // Scanner
public int getToken ( ) {
while (Character.isWhitespace(ch))
ch = buffer.get( );
if (Character.isLetter(ch)) {
ident = Character.toLowerCase(ch);
ch = buffer.get( );
token = Token.letter;
}
else if (Character.isDigit(ch)) {
intValue = getNumber( );
token = Token.number;
}
else {
switch (ch) {
case ';' : ch = buffer.get( );
token = Token.semicolon;
break;
case '.' : ch = buffer.get( );
token = Token.period;
break;
case '+' : ch = buffer.get( );
token = Token.plusop;
break;
case '-' : ch = buffer.get( );
token = Token.minusop;
break;
case '*' : ch = buffer.get( );
token = Token.timesop;
break;
case '/' : ch = buffer.get( );
token = Token.divideop;
break;
case '=' : ch = buffer.get( );
token = Token.assignop;
break;
case '(' : ch = buffer.get( );
token = Token.lparen;
break;
case ')' : ch = buffer.get( );
token = Token.rparen;
break;
default : error ("Illegal character " + ch );
break;
} // switch
} // if
return token;
} // getToken
public int number ( ) {
return intValue;
} // number
public char letter ( ) {
return ident;
} // letter
public void match (int which) {
token = getToken( );
if (token != which) {
error("Invalid token " + Token.toString(token) +
"-- expecting " + Token.toString(which));
System.exit(1);
} // if
} // match
public void error (String msg) {
System.err.println(msg);
System.exit(1);
} // error
private int getNumber ( ) {
int rslt = 0;
do {
rslt = rslt * 10 + Character.digit(ch, 10);
ch = buffer.get( );
} while (Character.isDigit(ch));
return rslt;
} // getNumber
} // Scanner
class Buffer {
private String line = "";
private int column = 0;
private int lineNo = 0;
private DataInputStream in;
public Buffer (DataInputStream in) {
this.in = in;
} // Buffer
public char get ( ) {
column++;
if (column >= line.length()) {
try {
line = in.readLine( );
} catch (Exception e) {
System.err.println("Invalid read operation");
System.exit(1);
} // try
if (line == null)
System.exit(0);
column = 0;
lineNo++;
System.out.println(line);
line = line + "\n";
} // if column
return line.charAt(column);
} // get
} // class Buffer
[edit] Token.java
class Token {
public static final int semicolon = 0;
public static final int period = 1;
public static final int plusop = 2;
public static final int minusop = 3;
public static final int timesop = 4;
public static final int divideop = 5;
public static final int assignop = 6;
public static final int lparen = 7;
public static final int rparen = 8;
public static final int letter = 9;
public static final int number = 10;
private static String[] spelling = {
";", ".", "+", "-", "*", "/", "=", "(", ")",
"letter", "number"};
public static String toString (int i) {
if (i < 0 || i > number)
return "";
return spelling[i];
} // toString
} // Token
[edit] Test.java
import java.io.*;
public class Test {
public static void main(String args[]) {
// command args ignored
Parser parser = new Parser(new Scanner(
new DataInputStream(System.in)));
parser.run( );
System.out.println("done");
} // main
} // class Test
[edit] OCaml
Recursive-descent parsers can be written in OCaml using the camlp4 stream-parsing syntax extension:
let rec factor = parser
| [< ''('; e = expr; '')' >] -> e
| [< 'c >] -> int_of_string (String.make 1 c)
and term = parser
| [< e1 = factor; e = term_aux e1 >] -> e
and term_aux e1 = parser
| [< ''*'; e2 = term >] -> e1 * e2
| [< ''/'; e2 = term >] -> e1 / e2
| [< >] -> e1
and expr = parser
| [< e1 = term; e = expr_aux e1 >] -> e
and expr_aux e1 = parser
| [< ''+'; e2 = expr >] -> e1 + e2
| [< ''-'; e2 = expr >] -> e1 - e2
| [< >] -> e1
let rec statement = parser
| [< e = expr; '';'; ''.'; stream >] ->
Printf.printf "= %d\n%!" e;
statement stream
let rec read() = match input_char stdin with
| ' ' | '\t' | '\n' -> [< read() >]
| c -> [< 'c; read() >]
let () = statement (read())
For example:
$ ocamlc -g -dtypes -pp camlp4o parser.ml -o parser $ ./parser 1+2+3;. = 6 1+2*3;. = 7 1+2*3+4;. = 11

