Hongxu Chen Hongxu Chen - 2 months ago 5
Scala Question

How is a Scala Map constructed?

Scala may instantiate with different Maps for different sizes of Map instances. As a result,

val l = List("a", "b", "c", "d", "e", "f", "g")

val ms = l.zipWithIndex map {
case (e, i) => l.take(i).zipWithIndex.toMap /// toMap
}

ms.foreach(m => println(m.getClass))


would emit:

class scala.collection.immutable.Map$EmptyMap$
class scala.collection.immutable.Map$Map1
class scala.collection.immutable.Map$Map2
class scala.collection.immutable.Map$Map3
class scala.collection.immutable.Map$Map4
class scala.collection.immutable.HashMap$HashTrieMap
class scala.collection.immutable.HashMap$HashTrieMap


(Above code is generated by
toMap
but using
apply
is quite similar.)

I find that
toMap
is defined as

def toMap[T, U](implicit ev: A <:< (T, U)): immutable.Map[T, U] = {
val b = immutable.Map.newBuilder[T, U]
for (x <- self)
b += x

b.result()
}


Where the magic part would be
result()
of
Builder
since it finally determine which Map subclass should be instantiated.

And inside
object Map
, there are
EmptyMap
,
Map1
,
Map2
,
Map3
,
Map4
and in
object HashMap
there is
HashTrieMap
; additionally, both contain an implicit
MapCanBuildFrom
.

So how Scala compiler determines which subclass to use for Map?

(This question may be more about
CanBuildFrom
, e.g.,
Seq(1)
would instantiate a
List(1)
.)

Answer

I peeked in the scala library and it seems that the only references to these 4 specific impl of Map are in their own "updated" and "-" methods. The magic starts in MutableMapFactory in method newBuilder where Map.empty is called which produces EmptyMap class. Then calling updated and adding key and value will make it Map1. Then again calling updated method in Map1 can result in Either Map1 or Map2 and calling - method can result in EmptyMap. etc...

private object EmptyMap extends AbstractMap[Any, Nothing] with Map[Any, Nothing] with Serializable {
    override def size: Int = 0
    def get(key: Any): Option[Nothing] = None
    def iterator: Iterator[(Any, Nothing)] = Iterator.empty
    override def updated [B1] (key: Any, value: B1): Map[Any, B1] = new Map1(key, value)
    def + [B1](kv: (Any, B1)): Map[Any, B1] = updated(kv._1, kv._2)
    def - (key: Any): Map[Any, Nothing] = this
  }

  class Map1[A, +B](key1: A, value1: B) extends AbstractMap[A, B] with Map[A, B] with Serializable {
    override def size = 1
    def get(key: A): Option[B] =
      if (key == key1) Some(value1) else None
    def iterator = Iterator((key1, value1))
    override def updated [B1 >: B] (key: A, value: B1): Map[A, B1] =
      if (key == key1) new Map1(key1, value)
      else new Map2(key1, value1, key, value)
    def + [B1 >: B](kv: (A, B1)): Map[A, B1] = updated(kv._1, kv._2)
    def - (key: A): Map[A, B] =
      if (key == key1) Map.empty else this
    override def foreach[U](f: ((A, B)) => U): Unit = {
      f((key1, value1))
    }
  }

  class Map2[A, +B](key1: A, value1: B, key2: A, value2: B) extends AbstractMap[A, B] with Map[A, B] with Serializable {
    override def size = 2
    def get(key: A): Option[B] =
      if (key == key1) Some(value1)
      else if (key == key2) Some(value2)
      else None
    def iterator = Iterator((key1, value1), (key2, value2))
    override def updated [B1 >: B] (key: A, value: B1): Map[A, B1] =
      if (key == key1) new Map2(key1, value, key2, value2)
      else if (key == key2) new Map2(key1, value1, key2, value)
      else new Map3(key1, value1, key2, value2, key, value)
    def + [B1 >: B](kv: (A, B1)): Map[A, B1] = updated(kv._1, kv._2)
    def - (key: A): Map[A, B] =
      if (key == key1) new Map1(key2, value2)
      else if (key == key2) new Map1(key1, value1)
      else this
    override def foreach[U](f: ((A, B)) => U): Unit = {
      f((key1, value1)); f((key2, value2))
    }
  }

  class Map3[A, +B](key1: A, value1: B, key2: A, value2: B, key3: A, value3: B) extends AbstractMap[A, B] with Map[A, B] with Serializable {
    override def size = 3
    def get(key: A): Option[B] =
      if (key == key1) Some(value1)
      else if (key == key2) Some(value2)
      else if (key == key3) Some(value3)
      else None
    def iterator = Iterator((key1, value1), (key2, value2), (key3, value3))
    override def updated [B1 >: B] (key: A, value: B1): Map[A, B1] =
      if (key == key1)      new Map3(key1, value, key2, value2, key3, value3)
      else if (key == key2) new Map3(key1, value1, key2, value, key3, value3)
      else if (key == key3) new Map3(key1, value1, key2, value2, key3, value)
      else new Map4(key1, value1, key2, value2, key3, value3, key, value)
    def + [B1 >: B](kv: (A, B1)): Map[A, B1] = updated(kv._1, kv._2)
    def - (key: A): Map[A, B] =
      if (key == key1)      new Map2(key2, value2, key3, value3)
      else if (key == key2) new Map2(key1, value1, key3, value3)
      else if (key == key3) new Map2(key1, value1, key2, value2)
      else this
    override def foreach[U](f: ((A, B)) => U): Unit = {
      f((key1, value1)); f((key2, value2)); f((key3, value3))
    }
  }

  class Map4[A, +B](key1: A, value1: B, key2: A, value2: B, key3: A, value3: B, key4: A, value4: B) extends AbstractMap[A, B] with Map[A, B] with Serializable {
    override def size = 4
    def get(key: A): Option[B] =
      if (key == key1) Some(value1)
      else if (key == key2) Some(value2)
      else if (key == key3) Some(value3)
      else if (key == key4) Some(value4)
      else None
    def iterator = Iterator((key1, value1), (key2, value2), (key3, value3), (key4, value4))
    override def updated [B1 >: B] (key: A, value: B1): Map[A, B1] =
      if (key == key1)      new Map4(key1, value, key2, value2, key3, value3, key4, value4)
      else if (key == key2) new Map4(key1, value1, key2, value, key3, value3, key4, value4)
      else if (key == key3) new Map4(key1, value1, key2, value2, key3, value, key4, value4)
      else if (key == key4) new Map4(key1, value1, key2, value2, key3, value3, key4, value)
      else new HashMap + ((key1, value1), (key2, value2), (key3, value3), (key4, value4), (key, value))
    def + [B1 >: B](kv: (A, B1)): Map[A, B1] = updated(kv._1, kv._2)
    def - (key: A): Map[A, B] =
      if (key == key1)      new Map3(key2, value2, key3, value3, key4, value4)
      else if (key == key2) new Map3(key1, value1, key3, value3, key4, value4)
      else if (key == key3) new Map3(key1, value1, key2, value2, key4, value4)
      else if (key == key4) new Map3(key1, value1, key2, value2, key3, value3)
      else this
    override def foreach[U](f: ((A, B)) => U): Unit = {
      f((key1, value1)); f((key2, value2)); f((key3, value3)); f((key4, value4))
    }
  }
}