#!perl -w use strict; use Bigrams; if (@ARGV < 1) { die "usage: perl uniapprox.pl textfile (threshold length examples)\n"; } my $a = UniApprox->new($ARGV[0]); my $flag = 1; if (@ARGV == 4) { for (my $i = 0; $i < $ARGV[3]; $i++) { print $a->make($ARGV[1],$ARGV[2]), "\n"; } } else { while ($flag) { print "Frequency threshold: "; my $thresh = ; chomp $thresh; if (length $thresh < 1 or $thresh < 0 or $thresh > 1) { $flag = 0; next; } print "length: "; my $len = ; chomp $len; if (length $len > 0) { print $a->make($thresh,$len), "\n"; } else { $flag = 0; } } } package UniApprox; sub new { my $class = shift; my $self = {}; bless $self, $class; my $textfile = shift; $self->_initialize($textfile); return $self; } sub make { my $self = shift; my $thresh = shift; my $len = shift; my $allunigrams = $self->{_lm}->{unigrams}; my @unigrams; foreach my $key (keys %$allunigrams) { if ($key ne "" and $key ne "" and $key !~ /\-/ and $allunigrams->{$key} > $thresh) { push @unigrams, $key; } } if (@unigrams < 1) { return "Threshold too high."; } my @result; for (my $i = 0; $i < $len; $i++) { my $index = rand @unigrams; push @result, $unigrams[$index]; } return join ' ', @result; } sub _initialize { my $self = shift; my $file = shift; my $lm = Bigrams->new($file); $self->{_lm} = $lm; } 1; =head1 NAME uniapprox.pl - Generate sentences according to unigram language model =head1 SYNOPSIS perl uniapprox.pl textfile perl uniapprox.pl textfile threshold length examples =head1 DESCRIPTION This program constructs a unigram language model (using F). It then allows the user to enter a loop wherein a frequency threshold is set and a word length is given. The program then returns a I where each word is above this threshold. The threshold, length, and number of examples can also be given on the command-line. =head1 AUTHOR Michael Hammond, F =cut