kal kal - 6 months ago 6
Perl Question

Iterating hash based on the insertion order?

Don't want to sort the entries.

using this does not preserve the order as well

foreach my $val (keys %hash) {
...
}

Answer

Hashes are unordered by default in Perl 5. You can use tie and Tie::IxHash to override this behavior. Be warned though, there is a performance hit and other considerations (like the fact that the order will not be preserved in copies).

#!/usr/bin/perl

use strict;
use warnings;

use Tie::IxHash;

tie my %hash, "Tie::IxHash"
    or die "could not tie %hash";

$hash{one}   = 1;
$hash{two}   = 2;
$hash{three} = 3;

for my $k (keys %hash) {
    print "$k $hash{$k}\n";
}

A better option may be to use an array or a hash of hashes:

#!/usr/bin/perl

use strict;
use warnings;

my %hash;
$hash{one}   = { data => 1, order => 1 };
$hash{three} = { data => 3, order => 2 };
$hash{two}   = { data => 2, order => 3 };

for my $k (sort { $hash{$a}{order} <=> $hash{$b}{order} } keys %hash) {
    print "$k $hash{$k}{data}\n";
}

As for performance, here are the results of a benchmark:

IndexedOO: a, b, c, d, e, f
HashOrdered: a, b, c, d, e, f
IxHashOO: a, b, c, d, e, f
hash: f, e, a, c, b, d
hoh_pis: a, b, c, d, e, f
IxHash: a, b, c, d, e, f
hoh: a, b, c, d, e, f
Indexed: a, b, c, d, e, f
              Rate IxHash  hoh Indexed IxHashOO IndexedOO hoh_pis HashOrdered hash
IxHash       261/s     -- -18%    -26%     -48%      -54%    -57%        -66% -80%
hoh          316/s    21%   --    -10%     -37%      -44%    -48%        -59% -75%
Indexed      353/s    35%  12%      --     -29%      -38%    -42%        -55% -72%
IxHashOO     499/s    91%  58%     41%       --      -12%    -18%        -36% -61%
IndexedOO    569/s   118%  80%     61%      14%        --     -7%        -27% -56%
hoh_pis      611/s   134%  93%     73%      22%        7%      --        -21% -52%
HashOrdered  778/s   198% 146%    120%      56%       37%     27%          -- -39%
hash        1279/s   391% 305%    262%     156%      125%    109%         64%   --

From this we can see that using Hash::Ordered is the way to go if you don't need it to behave like a normal hash (ie a tied hash).

Here is the benchmark:

#!/usr/bin/perl

use strict;
use warnings;

use Tie::IxHash;
use Tie::Hash::Indexed;
use Hash::Ordered;
use Benchmark;

#this is O(n) instead of O(n log n) or worse
sub perfect_insert_sort {
    my $h = shift;
    my @k;
    for my $k (keys %$h) {
        $k[$h->{$k}{order}] = $k;
    }
    return @k;
}

my @keys = qw/a b c d e f/;
my %subs = (
    hash => sub {
        my $i;
        my %h = map { $_ => $i++ } @keys;
        return join ", ", keys %h;
    },
    hoh => sub {
        my $i;
        my $order;
        my %h = map {
            $_ => { data => $i++, order => $order++ }
        } @keys;
        return join ", ", sort { $h{$a}{order} <=> $h{$b}{order} }
            keys %h;
    },
    hoh_pis => sub {
        my $i;
        my $order;
        my %h = map {
            $_ => { data => $i++, order => $order++ }
        } @keys;
        return join ", ", perfect_insert_sort \%h;
    },
    IxHash => sub {
        my $i;
        tie my %h, "Tie::IxHash";
        %h = map { $_ => $i++ } @keys;
        return join ", ", keys %h;
    },
    Indexed => sub {
        my $i;
        tie my %h, "Tie::Hash::Indexed";
        %h = map { $_ => $i++ } @keys;
        return join ", ", keys %h;
    },
    IxHashOO => sub {
        my $i;
        my $o = tie my %h, "Tie::IxHash",
            map { $_ => $i++ } @keys;
        return join ", ", $o->Keys;
    },
    IndexedOO => sub {
        my $i;
        my $o = tie my %h, "Tie::Hash::Indexed",
            map { $_ => $i++ } @keys;
        my @k = my $k = $o->FIRSTKEY;
        while ($k = $o->NEXTKEY($k)) {
            push @k, $k;
        }
        return join ", ", @k;
    },
    HashOrdered => sub {
    my $i;
        my $oh = Hash::Ordered->new( map { $_ => $i++ } @keys );
        return join ", ", $oh->keys;
    },
);

for my $sub (keys %subs) {
    print "$sub: ", $subs{$sub}(), "\n";
}

@keys = 1 .. 1_000;
Benchmark::cmpthese -2, \%subs;
Comments