Rob Rob - 5 months ago 8
Perl Question

Explicit package name and masking earlier declarations in the same statement

I tried to write a perl version of the following algorithmenter image description here

Here is the code that I have:

#!/usr/bin/perl
use warnings;
use strict;
use diagnostics;

my $s1 = 'GATTACCA';
my $s2 = 'AGTGGGCGGGGAGAGAGAGAGAGG';

my $dist = levdist($s1, $s2);

sub levdist
{
my ( $seq1, $seq2 ) = (@_)[0,1];

my $l1 = length($s1);
my $l2 = length($s2);
my @s1 = split '', $seq1;
my @s2 = split '', $seq2;

for (my $i = 0; $i <= $l1; $i++) {
my $distances->[$i]->[0] = $i;
}
for (my $j = 0; $j <= $l2; $j++) {
my $distances->[0]->[$j] = $j;
}
for (my $i = 1; $i <= $l1; $i++) {
for (my $j = 1; $j <= $l2; $j++) {
if ( $s1[$i-1] eq $s2[$j-1] ) {
my $cost = 0;
} else {
my $cost = 1;
}
my $distances->[$i]->[$j] = minimum($distances->[$i-1]->[$j-1] + my $cost,
$distances->[$i]->[$j-1]+1,
$distances->[$i-1]->[$j]+ 1 )
}
}
my $min_distance = my $distances->[$l1]->[$l2];
for (my $i = 0; $i <= $l1; $i++) { my $min_distance = minimum($min_distance, my $distances->[$i]->[$l2]);
}
for (my $j = 0; $j <= $l2; $j++ ) {
my $min_distance = minimum($min_distance, my $distances->[$l1]->[$j]);
}
return $min_distance;
}

sub minimum
{
my $min = shift @_;
foreach ( @_ ) {
if ( $_ < $min ) {
$min = $_;
}
}
return $min;
}


This throws the following error:

Global symbol "$distances" requires explicit package name at ./levenshtein.pl line 33.
Global symbol "$distances" requires explicit package name at ./levenshtein.pl line 34.
Global symbol "$distances" requires explicit package name at ./levenshtein.pl line 35.


When I alter the code to look like:

my $distances->[$i]->[$j] = minimum(my $distances->[$i-1]->[$j-1] + my $cost,
my $distances->[$i]->[$j-1]+1,
my $distances->[$i-1]->[$j]+ 1


I get the following set of errors:

"my" variable $distances masks earlier declaration in same statement at
./levenshtein.pl line 33 (#1)
(W misc) A "my", "our" or "state" variable has been redeclared in the
current scope or statement, effectively eliminating all access to the
previous instance. This is almost always a typographical error. Note
that the earlier variable will still exist until the end of the scope
or until all closure references to it are destroyed.

"my" variable $distances masks earlier declaration in same statement at
./levenshtein.pl line 34 (#1)
"my" variable $distances masks earlier declaration in same statement at
./levenshtein.pl line 35 (#1)


I feel like I am in a catch-22. I get an error if I declare the variable or not. Any insights would be appreciated.
Thanks,

Answer
  1. Use my to declare a variable once, in its proper scope. It will get cleaned up when it falls out of scope.

  2. Use library functions so as not to reinvent the wheel. List::More

  3. You should also use better variable names. $i,$l1,$i1 they are hard to read, but easy to introduce bugs.

  4. It would be more perlish use the range operator, so instead of writing

    for (my $i = 0; $i <= $l1; $i++) { you could use for my $i ( 0 .. $l1 ) {

  5. But you probably need (0 .. $li-1) instead of (0 .. $li) since perl arrays are 0-based by default, so you may be introducing a bug.

  6. I would recommend you write a test file to validate the results of your algorithm.

Here is a working (compiling/running, not necessarily correct) version:

#!/usr/bin/perl
use warnings;
use strict;
use diagnostics;  
use List::Util qw( min max );

my $s1 = 'GATTACCA'; 
my $s2 = 'AGTGGGCGGGGAGAGAGAGAGAGG'; 

my $dist = levdist($s1, $s2); 
print "Distance between '$s1' and '$s2' is $dist\n";

sub levdist { 
    my ( $seq1, $seq2 ) = (@_); 

    my $l1 = length($s1); 
    my $l2 = length($s2); 
    my @s1 = split '', $seq1; 
    my @s2 = split '', $seq2; 
    my $distances;
    for (my $i = 0; $i <= $l1; $i++) { 
        $distances->[$i]->[0] = $i; 
    } 
    for (my $j = 0; $j <= $l2; $j++) { 
        $distances->[0]->[$j] = $j;
    }
    for (my $i = 1; $i <= $l1; $i++) {
        for (my $j = 1; $j <= $l2; $j++) {
            my $cost;
            if ( $s1[$i-1] eq $s2[$j-1] ) {
                $cost = 0;
            } else {
                $cost = 1;
            }
            $distances->[$i]->[$j] = min($distances->[$i-1]->[$j-1] + $cost,
                                         $distances->[$i]->[$j-1]+1,
                                         $distances->[$i-1]->[$j]+ 1 )
        }
    }

    my $min_distance = $distances->[$l1]->[$l2];
    for (my $i = 0; $i <= $l1; $i++) {
        $min_distance = min($min_distance, $distances->[$i]->[$l2]);
    }
    for (my $j = 0; $j <= $l2; $j++ ) {
        $min_distance = min($min_distance, $distances->[$l1]->[$j]);
    }
    return $min_distance;
}

Output

Distance between 'GATTACCA' and 'AGTGGGCGGGGAGAGAGAGAGAGG' is 6

Have a read:

perldoc -f my

perldoc List::More

perldoc Test::More

Comments