#!perl -w use strict; use Bigrams; if (@ARGV < 1) { die "usage: perl biapprox.pl textfile (threshold length examples)\n"; } my $a = BiApprox->new($ARGV[0]); my $flag = 1; if (@ARGV == 4) { for (my $i = 0; $i < $ARGV[3]; $i++) { print $a->make($ARGV[1],$ARGV[2]), "\n"; } } else { while ($flag) { print "Frequency threshold: "; my $thresh = ; chomp $thresh; if (length $thresh < 1 or $thresh < 0 or $thresh > 1) { $flag = 0; next; } print "length: "; my $len = ; chomp $len; if (length $len > 0) { print $a->make($thresh,$len), "\n"; } else { $flag = 0; } } } package BiApprox; sub new { my $class = shift; my $self = {}; bless $self, $class; my $textfile = shift; $self->_initialize($textfile); return $self; } sub make { my $self = shift; my $thresh = shift; my $len = shift; my $allbigrams = $self->{_lm}->{bigrams}; my @bigrams; foreach my $key1 (keys %$allbigrams) { foreach my $key2 (keys %{$allbigrams->{$key1}}) { if ($allbigrams->{$key1}->{$key2} > $thresh) { push @bigrams, "$key1 $key2"; } } } if (@bigrams < 1) { return "Threshold too high."; } my @result; my (@initbigrams,@medialbigrams,@finalbigrams); #bigrams by position foreach my $bigram (@bigrams) { if ($bigram =~ /^/) { push @initbigrams, $bigram; } elsif ($bigram =~ /<\/s>$/) { push @finalbigrams, $bigram; } else { push @medialbigrams, $bigram; } } #at least one of each if (@initbigrams < 1 or @finalbigrams < 1 or @medialbigrams < 1) { return "Threshold too high."; } push @result, ""; #do initial bigram my $index = rand @initbigrams; my $word = $initbigrams[$index]; $word =~ s/^ (.*)$/$1/; push @result, $word; #medial bigrams $len--; my @temp; for (my $i = 1; $i < $len; $i++) { my $lastword = $result[$#result]; @temp = 0; foreach my $bigram (@medialbigrams) { if ($bigram =~ /^$lastword /) { $word = $bigram; $word =~ s/^$lastword (.*)$/$1/; push @temp, $word; } } my $index = rand @temp; push @result, $temp[$index]; } #final bigrams my $lastword = $result[$#result]; foreach my $bigram (@finalbigrams) { if ($bigram =~ /^$lastword /) { push @result, ""; last; } } return join ' ', @result; } sub _initialize { my $self = shift; my $file = shift; my $lm = Bigrams->new($file); $self->{_lm} = $lm; } 1; =head1 NAME biapprox.pl - Generate sentences according to bigram language model =head1 SYNOPSIS perl biapprox.pl textfile perl biapprox.pl textfile threshold length examples =head1 DESCRIPTION This program constructs a bigram language model (using F). It then allows the user to enter a loop wherein a frequency threshold is set and a word length is given. The program then returns a I where each bigram is above this threshold. The threshold, length, and number of examples can also be given on the command-line. =head1 AUTHOR Michael Hammond, F =cut