tbop tbop - 5 months ago 31
Ruby Question

How to make sure REXML::Formatters::Pretty uses \t instead of white-space for indentation

It seems to me that there's no way of making sure REXML::Formatters::Pretty can use \t instead of white-space for the indentation strategy in the XML Tree. The only thing I can do is to define how many white spaces are used per indentation level.

Am I wrong?

Answer

Not sure why REXML library does not provide you with this option since it could definitely support it internally but you can just roll your own formatter:

module REXML
   module Formatters
     class Prettier < Pretty
        attr_accessor :style
        def initialize(indentation = 2, indent_style =" ", ie_hack=false)
           @style = indent_style
           super(indentation,ie_hack)
        end
        protected  

        def write_element(node, output)
          output << style*@level
          output << "<#{node.expanded_name}"

          node.attributes.each_attribute do |attr|
            output << " "
            attr.write( output )
          end unless node.attributes.empty?

          if node.children.empty?
            if @ie_hack
              output << " "
            end
            output << "/"
          else
            output << ">"
            # If compact and all children are text, and if the formatted output
            # is less than the specified width, then try to print everything on
            # one line
            skip = false
            if compact
              if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
                string = ""
                old_level = @level
                @level = 0
                node.children.each { |child| write( child, string ) }
                @level = old_level
                if string.length < @width
                  output << string
                  skip = true
                end
              end
            end
            unless skip
              output << "\n"
              @level += @indentation
              node.children.each { |child|
                next if child.kind_of?(Text) and child.to_s.strip.length == 0
                write( child, output )
                output << "\n"
              }
              @level -= @indentation
              output << style*@level
            end
            output << "</#{node.expanded_name}"
          end
          output << ">"
        end

        def write_text( node, output )
          s = node.to_s()
          s.gsub!(/\s/,' ')
          s.squeeze!(" ")
          s = wrap(s, @width - @level)
          s = indent_text(s, @level, style, true)
          output << (style*@level + s)
        end

        def write_comment( node, output)
          output << style * @level
          Default.instance_method(:write_comment).bind(self).call(node,output)
        end

        def write_cdata( node, output)
          output << style * @level
          Default.instance_method(:write_cdata).bind(self).call(node,output)
        end
     end
   end
 end

Now you can specify your own indentation level and a indent style e.g.

require "rexml/document"
include REXML
string = <<EOF
  <mydoc>
    <someelement attribute="nanoo">Text, text, text</someelement>
  </mydoc>
EOF
doc = Document.new string

f = Formatters::Prettier(2,"h")
f.write(doc,$stdout)
#<mydoc>
#hh<someelement attribute='nanoo'>
#hhhhText, text, text
#hh</someelement>
#</mydoc>

I used "h" to show how the indentation works as \t will not show up in $stdout but in you case this would be

f = Formatters::Prettier(1,"\t")