#!/usr/bin/perl use vars qw/$VERSION/; $VERSION = '1.05'; use strict; use warnings; use Getopt::Long; use Pod::Usage; my $rules_file; my $string; my $verbose = 1; my $help; my $manual; GetOptions( 'rules=s' => \$rules_file, 'string=s' => \$string, 'help' => \$help, 'manual' => \$manual, 'verbose!' => \$verbose, ); $manual and pod2usage( verbose => 2 ); $help and pod2usage( verbose => 0 ); defined $rules_file or pod2usage( message => 'No file name specified.', verbose => 0 ); -f $rules_file or pod2usage( message => 'Invalid file name specified.', verbose => 0 ); defined $string or pod2usage( message => 'No string specifed.', verbose => 0 ); my @rules; open my $fh, '<', $rules_file or die "Could not open rules file: $!"; while (<$fh>) { chomp; push @rules, $_ if /\-\>/; } close $fh; print "Compiling rules.\n" if $verbose; my $rule_no = 1; foreach my $rule (@rules) { my $sub = compile_rule( $rule, $rule_no ); die "Invalid rule definition. Rule $rule_no." if not defined $sub; $rule = $sub; $rule_no++; } print "Interpreting rules.\n" if $verbose; print "Source string: '$string'\n"; my $i = 0; TOTAL: while( ++$i ) { # endless loop my $before = $string; my $ok = 0; my $rule_no = 0; foreach my $rule (@rules) { my($str, $done) = $rule->($string); $rule_no++; next if not defined $str; if ($done) { print "Step $i: Succeeded in rule $rule_no. Result is '$str'\n"; last TOTAL; } elsif (defined $done) { print "Step $i: Failed in rule $rule_no. Result is '$str'\n"; last TOTAL; } $string = $str; print "Step $i: '$before' -> '$string' (Rule $rule_no).\n" if $verbose; next TOTAL; } print("Step $i: Failed in rule $rule_no. Result is '$string'\n"), last if not $ok; } sub compile_rule { my $str = shift; my $rule_no = shift; $str =~ s/\s+//g; my ($left, $right) = split /\Q->\E/o, $str, 2; die "Invalid rule definition. Rule $rule_no." if $left eq '' or $right eq ''; $left = '' if $left eq "\\e"; $right = '' if $right eq "\\e"; die "Invalid character in rule ($1). Rule $rule_no." if $left =~ /([\-\\\.])/ or $right =~ /([\-\\])/ or $right =~ /\.(?!correct|fail)/; my $regex = qr/\Q$left\E/; my $sub; # Being ugly for speed. if ($right =~ /\.correct/) { $sub = sub { my $str = shift; $str =~ s/$regex// or return undef, undef; return $str, 1; }; } elsif ($right =~ /\.fail/) { $sub = sub { my $str = shift; $str =~ s/$regex// or return undef, undef; return $str, 0; }; } else { $sub = sub { my $str = shift; $str =~ s/$regex/$right/ or return undef, undef; return $str, undef; }; } return $sub; } __END__ =pod =head1 NAME markov - run Markov Algorithms =head1 AUTHOR Steffen Mueller, mail at steffen-mueller dot net =head1 COPYRIGHT Copyright (c) 2002 Steffen Mueller. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Please see the Perl Artistic License. =head1 VERSION The currently documented version is 1.03. =head1 SYNOPSIS markov -rules FILENAME -string 'STRING' [-help -manual] -help displays this synopsis. -manual displays the full manual. -rules FILENAME specifies a text file containing rule definitions. -string 'STRING' specifies a string to apply the rules to. Note that the quotation marks should be double quotes (") in Windows environments. -verbose turns on verbose output (default). -noverbose turns it off. Please also see the full manual for B important details. I suppose you won't be able to use this program without having skimmed the manual beforehand. =head1 DESCRIPTION Whitespace in both rule file and string is B! The formal epsilon (empty string) is, symbolized by C<\e>. C<.correct> and C<.fail> symbolize their formal counterparts. ".", "-" and "\" are illegal characters in both rules and strings unless they are used in their respective special meaning. (You can't assume I'm doing any extra fancy parsing work!) C<\e> must be the only token on its side. C<.correct> and C<.fail> may only appear on the right side. If you introduce characters that were not used in the original string, it is your task to remove them from the resulting string. I chose not to have them removed from the resulting string because they can be of great help when debugging your algorithm. Removing them, however, is trivial. I suggest you just use capital letters in the input and lower case letters as tokens introduced in the rules. =head1 EXAMPLE Given a file I that looks like this: BAa -> AaB aBB -> BaB aAA -> AaA aAB -> AaB aBA -> AaB a -> .correct \e -> a You may run markov like this: markov -rules rules.txt -string BAAABBA -verbose You will get this output: Compiling rules. Interpreting rules. Source string: 'BAAABBA' Step 1: 'BAAABBA' -> 'aBAAABBA' (Rule 7). Step 2: 'aBAAABBA' -> 'AaBAABBA' (Rule 5). Step 3: 'AaBAABBA' -> 'AAaBABBA' (Rule 5). Step 4: 'AAaBABBA' -> 'AAAaBBBA' (Rule 5). Step 5: 'AAAaBBBA' -> 'AAABaBBA' (Rule 2). Step 6: 'AAABaBBA' -> 'AAABBaBA' (Rule 2). Step 7: 'AAABBaBA' -> 'AAABBAaB' (Rule 5). Step 8: 'AAABBAaB' -> 'AAABAaBB' (Rule 1). Step 9: 'AAABAaBB' -> 'AAAAaBBB' (Rule 1). Step 10: 'AAAAaBBB' -> 'AAAABaBB' (Rule 2). Step 11: 'AAAABaBB' -> 'AAAABBaB' (Rule 2). Step 12: Succeeded in rule 6. Result is 'AAAABBB' 'Nuff said. =cut