c11cc c11cc - 3 days ago 4
Perl Question

Why this Perl script run out of memory gradually

I have a trouble in running a Perl script in muti-threads. It continued consume memory and finally the system ran out of memory and killed it. It seems that the sub-threads were detached but the system resource were not released when they finished. I am pretty new to Perl and couldn't find which part went wrong. This is part of the script that may cause this problem. Could anyone help me with this?

use strict;
use warnings;

print "different number:\t";
my $num1=<>;
chomp $num1;
if($num1!~/[1 2 3 4 5]/)
{
print "invalid input number\n";
END;
}

my $i=0;
my $no;
my @spacer1;
my $nn;
my @spacer2;

open IN,"file1.txt"or die"$!";
while(<IN>)
{
chomp;
if($_=~ /^>((\d)+)\|((\d)+)/)
{
$no=$1;
$spacer1[$no][0]=$3;
}
else
{
$spacer1[$no][1]=$_;
}
}
close IN;

open IN, "file2.txt" or die "$!";
while(<IN>)
{
chomp;
if($_=~ /^>((\d)+)\|((\d)+)/)
{
$nn=$1;
$spacer2[$nn][0]=$3;
}
else
{
$spacer2[$nn][1]=$_;
}
}
close IN;

#-----------------------------------------------------------------#create threads
use subs qw(sg_ana);
use threads;
use Thread::Semaphore;


my $cycl=(int($no/10000))+1;
my $c;
my @thd;
my $thread_limit= Thread::Semaphore -> new (3);

foreach $c(1..$cycl)
{
$thread_limit->down();
$thd[$c]=threads->create("sg_ana",$c-1,$c,$num1);
$thd[$c]->detach();
}
&waitquit;

#-------------------------------------------------------------#limite threads num
sub waitquit
{
print "waiting\n";
my $num=0;
while($num<3)
{
$thread_limit->down();
$num++;
}
}

#---------------------------------------------------------------#alignment
my $n;
my $n1;
my $j;
my $k;
my $l;
my $m;
my $num;#number of match
my $num2=0;;#arrange num



sub sg_ana
{
my $c1=shift;
my $c2=shift;
$num1=shift;
open OUT,">$num1.$c2.txt" or die "$!";
if($num1==1)
{
foreach $n($c1*10000..$c2*10000-1)
{
if($spacer2[$n][1])
{
my $presult1;
my $presult2;
$num2=-1;
foreach $i(0..19)
{
$num=0;
$num2++;
my $tmp1=(substr $spacer2[$n][1],0,$i)."\\"."w".(substr $spacer2[$n][1],$i+1,19-$i);
foreach $n1(0..@spacer1-1)
{
if($spacer1[$n1][1])
{
my $tmp2=substr $spacer1[$n1][1],0,20;
if($tmp2=~/$tmp1/)
{
$num++;
$presult1.=$n1.",";
}
}
}
$presult2=$i+1;
if($num>=4)
{
print OUT "\n";
}
}
}
}
}
close OUT;
$thread_limit->up();
}

Answer
  1. Rule one of debugging perl is enable use strict; and use warnings; and then sort out the errors. Actually, you should probably do that first of all, before you even start writing code.
  2. You're creating and limiting threads via a Semaphore - but actually this is really inefficient because of how perl does threads - they aren't lightweight, so spawning loads is a bad idea. A better way of doing this is via Thread::Queue a bit like this.
  3. Please use 3 arg open and lexical file handles. e.g. open ( my $out, '>', "$num.$c2.txt" ) or die $!;. You're probably getting away with it here, but you have got OUT as a global namespace variable being used by multiple threads. That way lies dragons.
  4. Don't use single letter variables. And given how you you use $c then you'd be far better off:

    foreach my $value ( 1..$cycl ) { 
        ##  do stuff
    }
    

The same is true of all your other single letter variables though - they're not meaningful.

  1. You pass $num before it's initialised, so it's always going to be undef within your sub. So your actual subroutine is just:

    sub sg_ana
      {
        my $c1=shift;
        my $c2=shift;
        $num1=shift;
        open OUT,">$num1.$c2.txt" or die "$!";   
    
        close OUT;
        $semaphore->up();
     }
    

Looking at it - I think you may be trying to do something with a shared variable there, but you're not actually sharing it. I can't decode the logic of your program though (thanks to having a load of single letter variables most likely) so I can't say for sure.

  1. You're calling a subroutine &waitquit;. That's not good style - prefixing with an ampersand and supplying no arguments does something subtly different to just invoking the sub 'normally' - so you should avoid it.
  2. Don't instantiate your semaphore like this:

    my $semaphore=new Thread::Semaphore(3);
    

That's an indirect procedure call, and bad style. It would be better written as:

my $thread_limit = Thread::Semaphore -> new ( 3 ); 
  1. I would suggest rather than using Semaphores like that, you'd be far better off not detatching your threads, and just using join. You also don't need an array of threads - threads -> list does that for you.

  2. I can't reproduce your problem, because your sub isn't doing anything. Have you by any chance modified it for posting? But a classic reason for perl memory exhaustion when threading is because each thread clones the parent process - and so 100 threads is 100x the memory.

Comments