DaniCee DaniCee - 5 months ago 19
Perl Question

Perl: replace ambiguous letters with each option

I am trying to do the following task which should be easy, but I cannot get my head around it:

I have two sequences with letters (nucleotide bases) that might be ambiguous. I want to rewrite each sequence in all its possibilities...

For example, the first sequence is:

CAGCMGCCGCGGTAAYWC


And it contains M, Y and W, which can be [A,C], [C,T], [A,T] respectively. Hence, the above sequence should be rewritten into the following possibilities:

CAGCAGCCGCGGTAACAC
CAGCAGCCGCGGTAACTC
CAGCAGCCGCGGTAATAC
CAGCAGCCGCGGTAATTC
CAGCCGCCGCGGTAACAC
CAGCCGCCGCGGTAACTC
CAGCCGCCGCGGTAATAC
CAGCCGCCGCGGTAATTC


I have the MWE so far:

#!/usr/bin/perl -w
use warnings;
use strict;

my %seqs = (
"PrefixPE/1" => "CAGCMGCCGCGGTAAYWC",
"PrefixPE/2" => "BSCCCGYCAATTYMTKTRAGT"
);

my %ops;
$ops{"R"}{"A"}="";
$ops{"R"}{"G"}="";
$ops{"Y"}{"C"}="";
$ops{"Y"}{"T"}="";
$ops{"M"}{"A"}="";
$ops{"M"}{"C"}="";
$ops{"K"}{"G"}="";
$ops{"K"}{"T"}="";
$ops{"W"}{"A"}="";
$ops{"W"}{"T"}="";
$ops{"B"}{"C"}="";
$ops{"B"}{"G"}="";
$ops{"B"}{"T"}="";
$ops{"S"}{"C"}="";
$ops{"S"}{"G"}="";

foreach my $id(keys %seqs){
my $seq=$seqs{$id};
my @nts=(split '', $seq);
my $i=0;
foreach my $n(@nts){
$i++;
if (exists $ops{$n}){
my $j=0;
foreach my $o(keys %{$ops{$n}}){
$j++;
print "$id, pos $i\.$j = <$o>\n";
}
}
else{
print "$id, pos $i = <$n>\n";
}
}
}

Answer

For each letter, expand the set of possible sequences.

#!/usr/bin/perl
use warnings;
use strict;
use feature qw{ say };
use Syntax::Construct qw{ /r };

my %ambiguous = ( M => [qw[ A C ]],
                  Y => [qw[ C T ]],
                  W => [qw[ A T ]],
                );

my $string = 'CAGCMGCCGCGGTAAYWC';
my $is_ambiguous = '[' . (join q(), keys %ambiguous) . ']';

my @strings = $string;
while ($strings[0] =~ $is_ambiguous) {
    my ($letter) = $strings[0] =~ /($is_ambiguous)/;
    @strings = map {
        my $s = $_;
        map $s =~ s/$letter/$_/r, @{ $ambiguous{$letter} }
        # map { (my $x = $s) =~ s/$letter/$_/; $x } @{ $ambiguous{$letter} }
    } @strings;
}

say for @strings;

On Perl before 5.14, use the commented line instead of the line above it, and remove Syntax::Construct.

Comments