#!/local/bin/perl # # Recursive-descent parser for an RFC 2234 ABNF grammar # It's probably silly to do this in Perl, but wottehell.... # use English; use ParserIO; inputfile(); if (rulelist() && EoF()) { print "OK\n"; print "Maximum jumpback: $maxback\n"; # Symbol table - check for undefinded rules for $name (sort(keys(%used))) { if (!$defined{$name}) { print "$name: Used $used{$name} times, but not defined\n"; } } for $name (sort(keys(%defined))) { if ($used{$name} == 1) { print "$name: Defined but not used\n"; } } } else { print "Not OK\n"; print "Syntax error: "; PrintTrailer(); } # General: Return 1 for OK, 0 for failure. sub rulelist { my $point; my $stack = enter(rulelist); $stat = 1; while ($stat == 1) { $point = mark(); if (rule()) { # OK - rule eaten } else { rollback($point); star_c_wsp(); $stat = c_nl(); } } leave($stack); } sub rule { my $stack = enter(rule); my $startpos = $ParserIO::pos; my $name = rulename(); # much more elegant, but handles no symbols: # my $stat = rulename() && defined_as() && elements() && c_nl(); if ($name && defined_as()) { $definition = elements(); if ($definition && c_nl()) { ++ $defined{$name}; $definition{$name} = $definition; $stat = 1; print "Rule: ", @ParserIO::buffer[$startpos..$ParserIO::pos-1]; # note that rules are constrained to end with an NL.... } else { $stat = 0; } } else { $stat = 0; } leave($stack); return $stat; } sub rulename { my $stack = enter(rulename); my $first = mark(); ALPHA() || return 0; while (ALPHA() || DIGIT() || Character("-")) {} # hacky.... my $rulename = join("", @ParserIO::buffer[$first..$ParserIO::pos-1]); #print "Rulename: ", $rulename, "\n"; ++ $used{$rulename}; # note - not rolled back on parse rollback leave($stack); return $rulename; } sub defined_as { my $stack = enter(defined_as); star_c_wsp(); Character("=") || return 0; Character("/"); # and ignore error code my $status = star_c_wsp(); leave($stack); return $status; } sub elements { my $stack = enter(elements); alternation() || return 0; star_c_wsp(); return 1; } sub star_c_wsp { my $stack = enter(star_c_wsp); while (c_wsp()){}; leave($stack); } sub c_wsp { my $stack = enter(c_wsp); if (WSP()) { # note: WSP eats nothing on failure, so needs no rollback leave($stack); return 1; } else { my $pos = mark(); if (c_nl() && WSP()) { leave($stack); return 1; } else { rollback($pos); leave($stack); return 0; } } } sub c_nl { my $stack = enter(c_nl); my $pos = mark(); if (comment()) { return 1; } else { rollback($pos); return CRLF(); } } sub comment { my $stack = enter(comment); Character(";") || return 0; while (WSP() || VCHAR()){} CRLF() || return 0; return 1; } sub alternation { my $stack = enter(alternation); concatenation() || return 0; while (star_c_wsp() && Character("/") && star_c_wsp() && concatenation()){} return 1; } sub concatenation { my $stack = enter(concatenation); repetition() || return 0; while (c_wsp() && star_c_wsp() && repetition()){}; return 1; } sub repetition { my $stack = enter(repetition); repeat(); # and ignore result - optional my $stat = element(); leave($stack); return $stat; } sub repeat { my $stack = enter(repeat); my $pos = mark(); # First try the *digit * *digit while (DIGIT()){}; if (Character("*")) { while (DIGIT()){}; } else { rollback($pos); DIGIT() || return 0; while (DIGIT()){}; } } sub element { my $stack = enter(element); my $pos = mark(); if (rulename()) { return 1; } elsif (rollback($pos) && group()) { return 1; } elsif (rollback($pos) && option()) { return 1; } elsif (rollback($pos) && char_val()) { return 1; } elsif (rollback($pos) && num_val()) { return 1; } elsif (rollback($pos) && prose_val()) { return 1; } } sub group { my $stack = enter(group); return Character("(") && star_c_wsp() && alternation() && star_c_wsp && Character(")"); } sub option { my $stack = enter(option); return Character("[") && star_c_wsp() && alternation() && star_c_wsp && Character("]"); } sub char_val { my $stack = enter(char_val); DQUOTE() || return 0; while (CharRange(0x20, 0x21) || CharRange(0x23, 0x7E)) {} DQUOTE() || return 0; } sub num_val { Character("%") || return 0; my $pos = mark(); if (bin_val()) { return 1; } elsif (rollback($pos) && dec_val()) { return 1; } elsif (rollback($pos) && hex_val()) { return 1; } else { rollback($pos); return 0; } } sub bin_val { Character("b") || return 0; BIT() || return 0; while (BIT()){} opt: { my $pos = mark(); if (Character(".")) { BIT() || (rollback($pos) , last opt); while (BIT()){} } elsif (Character("-")) { BIT() || (rollback($pos), last opt); while (BIT()){} } } return 1; } sub dec_val { Character("d") || return 0; DIGIT() || return 0; while (DIGIT()){} opt: { my $pos = mark(); if (Character(".")) { DIGIT() || (rollback($pos), last opt); while (DIGIT()){} } elsif (Character("-")) { DIGIT() || (rollback($pos), last opt); while (DIGIT()){} } } return 1; } sub hex_val { Character("x") || return 0; HEXDIG() || return 0; while (HEXDIG()){} opt: { my $pos = mark(); if (Character(".")) { HEXDIG() || (rollback($pos), last opt); while (HEXDIG()){} } elsif (Character("-")) { HEXDIG() || (rollback($pos), last opt); while (HEXDIG()){} } } return 1; } sub prose_val { my $pos = mark(); Character("<") || return 0; while (CharRange(0x20, 0x3d) || CharRange(0x3f, 0x7e)) {}; Character(">") || (rollback($pos), return 0); } sub ALPHA { return (CharRange(0x41, 0x5A) || CharRange(0x61, 0x7A)); } sub BIT { return(Character("0") || Character("1")); } sub CHAR { return (CharRange(0x01, 0x7f)); } sub CR { return(Character("\n")); } sub CRLF { #return (CR() && LF()); return LF(); # bowing to Unix reality.... } sub CTL { return (CharRange(0, 0x1F) || CharRange(0x7f, 0x7f)); } sub DIGIT { return(CharRange(0x30, 0x39)); } sub DQUOTE { return(CharRange(0x22, 0x22)); } sub HEXDIG { return(DIGIT() || Character("A") || Character("B") || Character("C") || Character("D") || Character("E") || Character("F")); } sub HTAB { return(CharRange(0x09, 0x09)); } sub LF { return(CharRange(0x0A, 0x0A)); } sub LWSP { while (WSP() || (CRLF() && WSP())) {}; } sub OCTET { return(CharRange(0x00, 0xFF)); } sub SP { return(CharRange(0x20, 0x20)); } sub VCHAR { return(CharRange(0x21, 0x7E)); } sub WSP { return(SP() || HTAB()); } #----------------------------- # Special-rules #----------------------------- sub Character { my $Ch = shift; if (nextchar() eq $Ch) { eatchar(); return 1; } else { return 0; } } sub CharRange { my ($lower, $upper) = @_; my $nc = ord(nextchar()); if ($nc >= $lower && $nc <= $upper) { eatchar(); return 1; } else { return 0; } }