Rob Rob - 5 months ago 19
Perl Question

"No such file" when opening multiple files in a directory, but no error when opening only one file

I can open one file in a directory and run the following code. However, when I try to use the same code on multiple files within a directory, I get an error regarding there not being a file.

I have tried to make sure that I am naming the files correctly, that they are in the right format, that they are located in my current working directory, and that things are referenced correctly.

I know a lot of people have had this error before and have posted similar questions, but any help would be appreciated.

Working code:

#!/usr/bin/perl

use warnings;
use strict;
use diagnostics;

use List::Util qw( min max );

my $RawSequence = loadSequence("LDTest.fasta");
my $windowSize = 38;
my $stepSize = 1;
my %hash;
my $s1;
my $s2;
my $dist;

for ( my $windowStart = 0; $windowStart <= 140; $windowStart += $stepSize ) {

my $s1 = substr( $$RawSequence, $windowStart, $windowSize );
my $s2 = 'CGGAGCTTTACGAGCCGTAGCCCAAACAGTTAATGTAG';
# the 28 nt forward primer after the barcode plus the first 10 nt of the mtDNA dequence

my $dist = levdist( $s1, $s2 );

$hash{$dist} = $s1;

#print "Distance between '$s1' and '$s2' is $dist\n";

sub levdist {
my ( $seq1, $seq2 ) = (@_)[ 0, 1 ];

my $l1 = length($s1);
my $l2 = length($s2);
my @s1 = split '', $seq1;
my @s2 = split '', $seq2;
my $distances;

for ( my $i = 0; $i <= $l1; $i++ ) {
$distances->[$i]->[0] = $i;
}

for ( my $j = 0; $j <= $l2; $j++ ) {
$distances->[0]->[$j] = $j;
}

for ( my $i = 1; $i <= $l1; $i++ ) {

for ( my $j = 1; $j <= $l2; $j++ ) {
my $cost;

if ( $s1[ $i - 1 ] eq $s2[ $j - 1 ] ) {
$cost = 0;
}
else {
$cost = 1;
}

$distances->[$i]->[$j] = minimum(
$distances->[ $i - 1 ]->[ $j - 1 ] + $cost,
$distances->[$i]->[ $j - 1 ] + 1,
$distances->[ $i - 1 ]->[$j] + 1,
);
}
}

my $min_distance = $distances->[$l1]->[$l2];

for ( my $i = 0; $i <= $l1; $i++ ) {
$min_distance = minimum( $min_distance, $distances->[$i]->[$l2] );
}

for ( my $j = 0; $j <= $l2; $j++ ) {
$min_distance = minimum( $min_distance, $distances->[$l1]->[$j] );
}

return $min_distance;
}
}

sub minimum {
my $min = shift @_;

foreach (@_) {
if ( $_ < $min ) {
$min = $_;
}
}

return $min;
}

sub loadSequence {
my ($sequenceFile) = @_;
my $sequence = "";

unless ( open( FASTA, "<", $sequenceFile ) ) {
die $!;
}

while (<FASTA>) {
my $line = $_;
chomp($line);

if ( $line !~ /^>/ ) {
$sequence .= $line; #if the line doesn't start with > it is the sequence
}
}

return \$sequence;
}

my @keys = sort { $a <=> $b } keys %hash;
my $BestMatch = $hash{ keys [0] };

if ( $keys[0] < 8 ) {
$$RawSequence =~ s/\Q$BestMatch\E/CGGAGCTTTACGAGCCGTAGCCCAAACAGTTAATGTAG/g;
print ">|Forward|Distance_of_Best_Match: $keys[0] |Sequence_of_Best_Match: $BestMatch", "\n",
"$$RawSequence", "\n";
}


Here is an abbreviated version of my non-working code. Things that haven't changed I didn't included:

Headers and Globals:

my $dir = ("/Users/roblogan/Documents/FakeFastaFiles");
my @ArrayofFiles = glob "$dir/*.fasta";

foreach my $file ( @ArrayofFiles ) {

open( my $Opened, $file ) or die "can't open file: $!";

while ( my $OpenedFile = <$Opened> ) {

my $RawSequence = loadSequence($OpenedFile);

for ( ... ) {

...;

print
">|Forward|Distance_of_Best_Match: $keys[0] |Sequence_of_Best_Match: $BestMatch",
"\n", "$$RawSequence", "\n";
}
}
}


The exact error is:

Uncaught exception from user code:
No such file or directory at ./levenshtein_for_directory.pl line 93, <$Opened> line 1.
main::loadSequence('{\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf470\x{a}') called at ./levenshtein_for_directory.pl line 22


line 93:

89 sub loadSequence{
90 my ($sequenceFile) = @_;
91 my $sequence = "";
92 unless (open(FASTA, "<", $sequenceFile)){
93 die $!;
94 }


Line 22:

18 foreach my $file ( @ArrayofFiles ) {
19 open (my $Opened, $file) or die "can't open file: $!";
20 while (my $OpenedFile = <$Opened>) {
21
22 my $RawSequence = loadSequence($OpenedFile);
23

Answer

I just learned that "FASTA file" is a settled term. Wasn't aware of that and previously thought they are some files and contain filenames or something. As @zdim already said, you're opening these files twice.

The following code gets a list of FASTA files (only the filenames) and then calls loadSequence with each such a filename. That subroutine then opens the given file, concatenates the none-^> lines to one big line and returns it.

# input:  the NAME of a FASTA file
# return: all sequences in that file as one very long string
sub loadSequence
{
    my ($fasta_filename) = @_;
    my $sequence = "";
    open( my $fasta_fh, '<', $fasta_filename ) or die "Cannot open $fasta_filename: $!\n";
    while ( my $line = <$fasta_fh> ) {
        chomp($line);
        if ( $line !~ /^>/ ) {
            $sequence .= $line;    #if the line doesn't start with > it is the sequence
        }
    }
    close($fasta_fh);
    return $sequence;
}

# ...

my $dir = '/Users/roblogan/Documents/FakeFastaFiles';
my @ArrayofFiles = glob "$dir/*.fasta";
foreach my $filename (@ArrayofFiles) {
    my $RawSequence = loadSequence($filename); 
    # ...
}
Comments