#!/usr/bin/perl use strict; use warnings; use 5.006; use Pod::Usage; use vars qw/$VERSION/; $VERSION = '1.00'; =pod =head1 NAME chomsky - determine Chomsky type of a grammar =head1 AUTHOR Steffen Mueller, mail at steffen-mueller dot net =head1 COPYRIGHT Copyright (c) 2002 Steffen Mueller. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Please see the Perl Artistic License. =head1 VERSION The currently documented version is 1.00. =head1 DESCRIPTION See website http://steffen-mueller.net/chomsky/ If it is not there any more, you are out of luck. This program comes with no warranty whatsoever. It's not guaranteed to work as you expect it to. What piece of software is, after all? =cut use Data::Dumper; my $rules_file = shift; my $self_path = $0; $self_path =~ s/^.*(?:\/|\\)//; print("Usage: $self_path RULESFILENAME\n\n"), pod2usage( verbose => 2 ) if not defined $rules_file or not -f $rules_file; open my $rf, '<', $rules_file or die "Could not open rules file ($rules_file): $!"; my($term_ref, $nonterm_ref) = get_defs( $rf ); my %terminals = %$term_ref; my %nonterminals = %$nonterm_ref; my @prods = get_productions($rf, $term_ref, $nonterm_ref); # print Dumper \@prods; my $xterm = join ('|', map {quotemeta $_} keys %terminals ); my $xnon_term = join ('|', map {quotemeta $_} keys %nonterminals ); my $xalphabet = qr/(?:$xterm|$xnon_term)/; # Match the alphabet. $xterm = qr/(?:$xterm)/; # Match any terminal. $xnon_term = qr/(?:$xnon_term)/; # Match any non-terminal. my $arrow = "->"; my $lowest = 3; my @types; my $l_r = ''; my $not_3 = 0; PRODS: foreach my $prod (@prods) { my $ch_type = 0; # print $types[-1],"\n" if @types; # print Dumper $prod; my $term_left = scalar( grep { exists $terminals{$_} } @{ $prod->{left} } ); my $nonterm_left = scalar( grep { exists $nonterminals{$_} } @{ $prod->{left} } ); # context sensitive? my $result = check_CH1( $prod, $xterm, $xnon_term, $xalphabet, $arrow ); unless ($result) { $lowest = 0; push @types, "Production does not comply with CH-1."; next PRODS; } # context free? if ( $term_left > 0 or $nonterm_left > 1 ) { $lowest = ($lowest < 1 ? $lowest : 1); push @types, "Production does not comply with CH-2."; next PRODS; } # regular? push(@types, "Production complies with CH-3."), next if @{$prod->{right}} == 0 or ( grep { exists $nonterminals{$_} } @{ $prod->{right} } ) == 0; my $not_ok = sub { $not_3 = 1; push @types, "Whole grammar does not comply with CH-3."; $lowest = ($lowest < 2 ? $lowest : 2); }; $not_ok->(), next PRODS if @{ $prod->{right} } > 2 or ( @{ $prod->{right} } == 1 and exists $nonterminals{$prod->{right}[0]} ); if ( $l_r eq 'left' ) { $not_ok->(), next PRODS if exists $terminals{ $prod->{right}[0] } or exists $nonterminals{ $prod->{right}[1] }; } elsif ( $l_r eq 'right' ) { $not_ok->(), next PRODS if exists $nonterminals{ $prod->{right}[0] } or exists $terminals{ $prod->{right}[1] }; } else { if ( exists $terminals{ $prod->{right}[0] } ) { $not_ok->(), next PRODS if not exists $nonterminals{ $prod->{right}[1] }; $l_r = 'right'; } else { $not_ok->(), next PRODS if not exists $terminals{ $prod->{right}[1] }; $l_r = 'left'; } } push @types, "Production complies with CH-3."; } print Dumper \@types; print "\n The grammar is of type CH-$lowest.\n"; print "The grammar is $l_r-linear." if $l_r and $lowest == 3; sub get_productions { my $rf = shift; my $terms = shift; my $nonterms = shift; my @productions; while (1) { my $prods = _extract_productions($rf, $terms, $nonterms); last if not defined $prods or @$prods == 0; push @productions, @$prods; } die "No valid production definitions found." if @productions == 0; return @productions; } sub _extract_productions { my $rf = shift; my $terms = shift; my $nonterms = shift; my $prod; my $line; my $okay = 0; while ( $line = <$rf> ) { remove_comments($line); if ( $line =~ /^\s*(?:P|Pr|Pro|Prod|Produ|Produc|Product|Producti|Producio|Production):/ ) { $line =~ s/^\s*(?:P|Pr|Pro|Prod|Produ|Produc|Product|Producti|Producio|Production)://; $okay++; last; } } return undef if not $okay; unless ($line =~ /;/) { while (<$rf>) { remove_comments($_); $line .= $_; last if /;/; } } $line =~ s/;.*//s; $line =~ s/^(.*?)\-\>// or die "Malformed production definition."; my $left = $1; $left =~s/^\s+//; $left =~s/\s+$//; my @left = split /\s+/, $left; foreach (@left) { die "Invalid production definition (left side: '" . (defined $_ ? $_ : '') . "')." if not exists $terms-> {$_} and not exists $nonterms->{$_}; } $line =~ s/^\s+//s; $line =~ s/\s+$//s; my @prods = split /\s*\|\s*/, $line; @prods or die "Invalid production definition"; my @productions; foreach my $right_side (@prods) { my $production = { left => \@left }; my @right = split /\s+/, $right_side; foreach (@right) { die "Invalid production definition (right side)." unless exists $terms-> {$_} or exists $nonterms->{$_}; } $production->{right} = \@right; push @productions, $production; } return \@productions; } sub get_defs { my $rf = shift; my $term; my $first_type; while ( $term = <$rf> ) { $first_type = $1, last if $term =~ /^ \s* (E|N) \s* = \s*/x; } $first_type or die "Invalid (non-)terminal definition."; my $first = _extract_defs($rf, $term, $first_type); my($second_type) = grep {$_ ne $first_type} ('E', 'N'); $term = undef; while ( $term = <$rf> ) { last if $term =~ /^ \s* \Q$second_type\E \s* = \s*/x; } my $second = _extract_defs($rf, $term, $second_type); return ($first_type eq 'E' ? ($first, $second) : ($second, $first)); } sub _extract_defs { my $rf = shift; my $term = shift; my $type = shift; remove_comments($term); unless ($term =~ /\}/) { while (<$rf>) { remove_comments($_); $term .= $_; last if /\}/; } } $term =~ s/\}.*//s; $term =~ /^ \s* \Q$type\E \s* = \s* { \s* ( (?: \S+ \s* )+ ) $/xs or die "Invalid (non-)terminal definition. (Should be $type={a b c d ...}.)"; my %terms = map {($_, undef)} split /\s*/, $1; return \%terms; } sub remove_comments { my $trailing = chomp $_[0]; $_[0] =~ s/#.*//; $_[0] .= "\n" if $trailing; return $_[0]; } # with the generous help from Kripa Sundar, Kripa.Sundar@synopsys.com. # I got the help from the Fun with Perl mailing list: fwp@perl.org # (The whole idea to use Perl RE's backtracking abilities in this case # plus an example implementation! # I originally tried to think of an # algorithm that worked on the arrays $prod->{left} and $prod->{right}, # but that turned out to be very hard. After all, RE engines are hard # to implement even if they are only meant to check compliance with # a simple condition. ) sub check_CH1 { my $prod = shift; my $xterm = shift; my $xnterm = shift; my $xalpha = shift; my $arrow = shift; my $rule = join("", @{$prod->{left}}, $arrow, @{$prod->{right}}); return 0 if $rule !~ m{^ ($xalpha*) $xnterm ($xalpha*) # LHS \Q$arrow\E \1 $xalpha+ \2 # RHS }x; return 1; }