maasha maasha - 4 months ago 16
Ruby Question

How to create a memory efficient Ruby Pipe class with lazy evaluation?

I would like to create a Pipe class to emulate Unix commands in Ruby in a two step fashion. First step is to compile a pipeline by adding a number of commands, and the second step is to run that pipeline. Here is a mockup:

#!/usr/bin/env ruby

p = Pipe.new
p.add(:cat, input: "table.txt")
p.add(:cut, field: 2)
p.add(:grep, pattern: "foo")
p.add(:puts, output: "result.txt")
p.run


The question is how to code this using lazy evaluation, so that the pipe is processed record by record when
run()
is called without loading all of the data into memory at any one time?

Answer

This seems to work:

#!/usr/bin/env ruby

require 'pp'

class Pipe
  def initialize
    @commands = []
  end

  def add(command, options = {})
    @commands << [command, options]

    self
  end

  def run
    enum = nil

    @commands.each do |command, options|
      enum = method(command).call enum, options
    end

    enum.each {}

    enum
  end

  def to_s
    cmd_string = "Pipe.new"

    @commands.each do |command, options|
      opt_list = []

      options.each do |key, value|
        if value.is_a? String
          opt_list << "#{key}: \"#{value}\""
        else
          opt_list << "#{key}: #{value}"
        end
      end

      cmd_string << ".add(:#{command}, #{opt_list.join(", ")})"
    end

    cmd_string << ".run"
  end

  private

  def cat(enum, options)
    Enumerator.new do |yielder|
      enum.map { |line| yielder << line } if enum

      File.open(options[:input]) do |ios|
        ios.each { |line| yielder << line }
      end
    end.lazy
  end

  def cut(enum, options)
    Enumerator.new do |yielder|
      enum.each do |line|
        fields = line.chomp.split(%r{#{options[:delimiter]}})

        yielder << fields[options[:field]]
      end
    end.lazy
  end

  def grep(enum, options)
    Enumerator.new do |yielder|
      enum.each do |line|
        yielder << line if line.match(options[:pattern])
      end
    end.lazy
  end

  def save(enum, options)
    Enumerator.new do |yielder|
      File.open(options[:output], 'w') do |ios|
        enum.each do |line|
          ios.puts line
          yielder << line
        end
      end
    end.lazy
  end
end

p = Pipe.new
p.add(:cat, input: "table.txt")
p.add(:cut, field: 2, delimiter: ',\s*')
p.add(:grep, pattern: "4")
p.add(:save, output: "result.txt")
p.run

puts p