Prashant Ravi Prashant Ravi - 1 month ago 9
Scala Question

Running dummy_rocc_test on zed board

The dummy_rocc_test runs fine on spike with the custom0 instruction but when I put it on the zed board and run it with

./fesvr-zynq pk dummy_rocc_test
it gives me an error of illegal instruction.

I understand this is because the rocc io is not but in the default config of rocket chip, but I'd like to know how to enable the RoCC interface in order to run the dummy_roc_test on zed board. I tried giving the below listed value to the "BuildRoCC" as shown in the snippet

case BuildRoCC => Some(() => (Module(new AccumulatorExample, { case CoreName => "rocket" })))


but while doing make rocket, I'm getting the below error:

[error] /home/prashantravi/rocket-chip/src/main/scala/Configs.scala:100: could not find implicit value for parameter p: cde.Parameters
[error] case BuildRoCC => Some(() => (Module(new AccumulatorExample, { case CoreName => "rocket" })))
[error] ^
[error] one error found
[error] (rocketchip/compile:compileIncremental) Compilation failed
[error] Total time: 8 s, completed Oct 27, 2015 11:24:59 AM


the configs.scala code is as below.

// See LICENSE for license details.

package rocketchip

import Chisel._
import junctions._
import uncore._
import rocket._
import rocket.Util._
import zscale._
import scala.math.max
import DefaultTestSuites._
import cde.{Parameters, Config, Dump, Knob}

class DefaultConfig extends Config (
topDefinitions = { (pname,site,here) =>
type PF = PartialFunction[Any,Any]
def findBy(sname:Any):Any = here[PF](site[Any](sname))(pname)
def genCsrAddrMap: AddrMap = {
val csrSize = (1 << 12) * (site(XLen) / 8)
val csrs = (0 until site(NTiles)).map{ i =>
AddrMapEntry(s"csr$i", None, MemSize(csrSize, AddrMapConsts.RW))
}
val scrSize = site(HtifKey).nSCR * (site(XLen) / 8)
val scr = AddrMapEntry("scr", None, MemSize(scrSize, AddrMapConsts.RW))
new AddrMap(csrs :+ scr)
}
pname match {
case HtifKey => HtifParameters(
width = Dump("HTIF_WIDTH", 16),
nSCR = 64,
offsetBits = site(CacheBlockOffsetBits),
nCores = site(NTiles))
//Memory Parameters
case PAddrBits => 32
case PgIdxBits => 12
case PgLevels => if (site(XLen) == 64) 3 /* Sv39 */ else 2 /* Sv32 */
case PgLevelBits => site(PgIdxBits) - log2Up(site(XLen)/8)
case VPNBits => site(PgLevels) * site(PgLevelBits)
case PPNBits => site(PAddrBits) - site(PgIdxBits)
case VAddrBits => site(VPNBits) + site(PgIdxBits)
case ASIdBits => 7
case MIFTagBits => Dump("MEM_TAG_BITS",
log2Up(site(NAcquireTransactors)+2) +
log2Up(site(NBanksPerMemoryChannel)) +
log2Up(site(NMemoryChannels)))
case MIFDataBits => Dump("MEM_DATA_BITS", 128)
case MIFAddrBits => Dump("MEM_ADDR_BITS", site(PAddrBits) - site(CacheBlockOffsetBits))
case MIFDataBeats => site(CacheBlockBytes) * 8 / site(MIFDataBits)
case NastiKey => NastiParameters(
dataBits = site(MIFDataBits),
addrBits = site(PAddrBits),
idBits = site(MIFTagBits))
//Params used by all caches
case NSets => findBy(CacheName)
case NWays => findBy(CacheName)
case RowBits => findBy(CacheName)
case NTLBEntries => findBy(CacheName)
case "L1I" => {
case NSets => Knob("L1I_SETS") //64
case NWays => Knob("L1I_WAYS") //4
case RowBits => 4*site(CoreInstBits)
case NTLBEntries => 8
}:PF
case "L1D" => {
case NSets => Knob("L1D_SETS") //64
case NWays => Knob("L1D_WAYS") //4
case RowBits => 2*site(CoreDataBits)
case NTLBEntries => 8
}:PF
case ECCCode => None
case Replacer => () => new RandomReplacement(site(NWays))
case AmoAluOperandBits => site(XLen)
//L1InstCache
case BtbKey => BtbParameters()
//L1DataCache
case WordBits => site(XLen)
case StoreDataQueueDepth => 17
case ReplayQueueDepth => 16
case NMSHRs => Knob("L1D_MSHRS")
case NIOMSHRs => 1
case LRSCCycles => 32
//L2 Memory System Params
case NAcquireTransactors => 7
case L2StoreDataQueueDepth => 1
case L2DirectoryRepresentation => new NullRepresentation(site(NTiles))
case BuildL2CoherenceManager => (p: Parameters) =>
Module(new L2BroadcastHub()(p.alterPartial({
case InnerTLId => "L1toL2"
case OuterTLId => "L2toMC" })))
//Tile Constants
case BuildTiles => {
TestGeneration.addSuites(rv64i.map(_("p")))
TestGeneration.addSuites((if(site(UseVM)) List("pt","v") else List("pt")).flatMap(env => rv64u.map(_(env))))
TestGeneration.addSuites(if(site(NTiles) > 1) List(mtBmarks, bmarks) else List(bmarks))
List.fill(site(NTiles)){ (r: Bool, p: Parameters) =>
Module(new RocketTile(resetSignal = r)(p.alterPartial({case TLId => "L1toL2"})))
}
}
case BuildRoCC => Some(() => (Module(new AccumulatorExample, { case CoreName => "rocket" })))
case RoccNMemChannels => 1
//Rocket Core Constants
case FetchWidth => 1
case RetireWidth => 1
case UseVM => true
case UsePerfCounters => true
case FastLoadWord => true
case FastLoadByte => false
case FastMulDiv => true
case XLen => 64
case UseFPU => {
val env = if(site(UseVM)) List("p","pt","v") else List("p","pt")
if(site(FDivSqrt)) TestGeneration.addSuites(env.map(rv64uf))
else TestGeneration.addSuites(env.map(rv64ufNoDiv))
true
}
case FDivSqrt => true
case SFMALatency => 2
case DFMALatency => 3
case CoreInstBits => 32
case CoreDataBits => site(XLen)
case NCustomMRWCSRs => 0
//Uncore Paramters
case RTCPeriod => 100 // gives 10 MHz RTC assuming 1 GHz uncore clock
case LNEndpoints => site(TLKey(site(TLId))).nManagers + site(TLKey(site(TLId))).nClients
case LNHeaderBits => log2Ceil(site(TLKey(site(TLId))).nManagers) +
log2Up(site(TLKey(site(TLId))).nClients)
case TLKey("L1toL2") =>
TileLinkParameters(
coherencePolicy = new MESICoherence(site(L2DirectoryRepresentation)),
nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels),
nCachingClients = site(NTiles),
nCachelessClients = 1 + site(NTiles) *
(1 + (if(site(BuildRoCC).isEmpty) 0 else site(RoccNMemChannels))),
maxClientXacts = max(site(NMSHRs) + site(NIOMSHRs),
if(site(BuildRoCC).isEmpty) 1 else site(RoccMaxTaggedMemXacts)),
maxClientsPerPort = if(site(BuildRoCC).isEmpty) 1 else 2,
maxManagerXacts = site(NAcquireTransactors) + 2,
dataBits = site(CacheBlockBytes)*8)
case TLKey("L2toMC") =>
TileLinkParameters(
coherencePolicy = new MEICoherence(new NullRepresentation(site(NBanksPerMemoryChannel))),
nManagers = 1,
nCachingClients = site(NBanksPerMemoryChannel),
nCachelessClients = 0,
maxClientXacts = 1,
maxClientsPerPort = site(NAcquireTransactors) + 2,
maxManagerXacts = 1,
dataBits = site(CacheBlockBytes)*8)
case TLKey("Outermost") => site(TLKey("L2toMC")).copy(dataBeats = site(MIFDataBeats))
case NTiles => Knob("NTILES")
case NMemoryChannels => 1
case NBanksPerMemoryChannel => Knob("NBANKS")
case NOutstandingMemReqsPerChannel => site(NBanksPerMemoryChannel)*(site(NAcquireTransactors)+2)
case BankIdLSB => 0
case CacheBlockBytes => 64
case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes))
case UseBackupMemoryPort => true
case MMIOBase => BigInt(1 << 30) // 1 GB
case ExternalIOStart => 2 * site(MMIOBase)
case GlobalAddrMap => AddrMap(
AddrMapEntry("mem", None, MemSize(site(MMIOBase), AddrMapConsts.RWX)),
AddrMapEntry("conf", None, MemSubmap(site(ExternalIOStart) - site(MMIOBase), genCsrAddrMap)),
AddrMapEntry("io", Some(site(ExternalIOStart)), MemSize(2 * site(MMIOBase), AddrMapConsts.RW)))
}},
knobValues = {
case "NTILES" => 1
case "NBANKS" => 1
case "L1D_MSHRS" => 2
case "L1D_SETS" => 64
case "L1D_WAYS" => 4
case "L1I_SETS" => 64
case "L1I_WAYS" => 4
}
)
class DefaultVLSIConfig extends DefaultConfig
class DefaultCPPConfig extends DefaultConfig

class With2Cores extends Config(knobValues = { case "NTILES" => 2 })
class With4Cores extends Config(knobValues = { case "NTILES" => 4 })
class With8Cores extends Config(knobValues = { case "NTILES" => 8 })

class With2Banks extends Config(knobValues = { case "NBANKS" => 2 })
class With4Banks extends Config(knobValues = { case "NBANKS" => 4 })
class With8Banks extends Config(knobValues = { case "NBANKS" => 8 })

class WithL2Cache extends Config(
(pname,site,here) => pname match {
case "L2_CAPACITY_IN_KB" => Knob("L2_CAPACITY_IN_KB")
case "L2Bank" => {
case NSets => (((here[Int]("L2_CAPACITY_IN_KB")*1024) /
site(CacheBlockBytes)) /
site(NBanksPerMemoryChannel)*site(NMemoryChannels)) /
site(NWays)
case NWays => Knob("L2_WAYS")
case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat
}: PartialFunction[Any,Any]
case NAcquireTransactors => 2
case NSecondaryMisses => 4
case L2DirectoryRepresentation => new FullRepresentation(site(NTiles))
case BuildL2CoherenceManager => (p: Parameters) =>
Module(new L2HellaCacheBank()(p.alterPartial({
case CacheName => "L2Bank"
case InnerTLId => "L1toL2"
case OuterTLId => "L2toMC"})))
},
knobValues = { case "L2_WAYS" => 8; case "L2_CAPACITY_IN_KB" => 2048 }
)

class WithL2Capacity2048 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 2048 })
class WithL2Capacity1024 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 1024 })
class WithL2Capacity512 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 512 })
class WithL2Capacity256 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 256 })
class WithL2Capacity128 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 128 })
class WithL2Capacity64 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 64 })

class DefaultL2Config extends Config(new WithL2Cache ++ new DefaultConfig)
class DefaultL2VLSIConfig extends Config(new WithL2Cache ++ new DefaultVLSIConfig)
class DefaultL2CPPConfig extends Config(new WithL2Cache ++ new DefaultCPPConfig)
class DefaultL2FPGAConfig extends Config(new WithL2Capacity64 ++ new WithL2Cache ++ new DefaultFPGAConfig)

class WithZscale extends Config(
(pname,site,here) => pname match {
case BuildZscale => {
TestGeneration.addSuites(List(rv32ui("p"), rv32um("p")))
TestGeneration.addSuites(List(zscaleBmarks))
(r: Bool, p: Parameters) => Module(new Zscale(r)(p))
}
case BootROMCapacity => Dump("BOOT_CAPACITY", 16*1024)
case DRAMCapacity => Dump("DRAM_CAPACITY", 64*1024*1024)
}
)

class ZscaleConfig extends Config(new WithZscale ++ new DefaultConfig)

class FPGAConfig extends Config (
(pname,site,here) => pname match {
case NAcquireTransactors => 4
case UseBackupMemoryPort => false
}
)

class DefaultFPGAConfig extends Config(new FPGAConfig ++ new DefaultConfig)

class SmallConfig extends Config (
topDefinitions = { (pname,site,here) => pname match {
case UseFPU => false
case FastMulDiv => false
case NTLBEntries => 4
case BtbKey => BtbParameters(nEntries = 8)
}},
knobValues = {
case "L1D_SETS" => 64
case "L1D_WAYS" => 1
case "L1I_SETS" => 64
case "L1I_WAYS" => 1
}
)

class DefaultFPGASmallConfig extends Config(new SmallConfig ++ new DefaultFPGAConfig)

class ExampleSmallConfig extends Config(new SmallConfig ++ new DefaultConfig)

class MultibankConfig extends Config(new With2Banks ++ new DefaultConfig)
class MultibankL2Config extends Config(
new With2Banks ++ new WithL2Cache ++ new DefaultConfig)


The Rocc.scala where the actual accumulator example is given is as below

// See LICENSE for license details.

package rocket

import Chisel._
import uncore._
import Util._
import cde.{Parameters, Field}

case object RoccMaxTaggedMemXacts extends Field[Int]
case object RoccNMemChannels extends Field[Int]

class RoCCInstruction extends Bundle
{
val funct = Bits(width = 7)
val rs2 = Bits(width = 5)
val rs1 = Bits(width = 5)
val xd = Bool()
val xs1 = Bool()
val xs2 = Bool()
val rd = Bits(width = 5)
val opcode = Bits(width = 7)
}

class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) {
val inst = new RoCCInstruction
val rs1 = Bits(width = xLen)
val rs2 = Bits(width = xLen)
}

class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
val rd = Bits(width = 5)
val data = Bits(width = xLen)
}

class RoCCInterface(implicit p: Parameters) extends Bundle {
val cmd = Decoupled(new RoCCCommand).flip
val resp = Decoupled(new RoCCResponse)
val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
val busy = Bool(OUTPUT)
val s = Bool(INPUT)
val interrupt = Bool(OUTPUT)

// These should be handled differently, eventually
val imem = new ClientUncachedTileLinkIO
val dmem = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO)
val iptw = new TLBPTWIO
val dptw = new TLBPTWIO
val pptw = new TLBPTWIO
val exception = Bool(INPUT)
}

abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) {
val io = new RoCCInterface
io.mem.req.bits.phys := Bool(true) // don't perform address translation
}

class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) {
val regfile = Mem(UInt(width = xLen), n)
val busy = Reg(init=Vec(Bool(false), n))

val cmd = Queue(io.cmd)
val funct = cmd.bits.inst.funct
val addr = cmd.bits.inst.rs2(log2Up(n)-1,0)
val doWrite = funct === UInt(0)
val doRead = funct === UInt(1)
val doLoad = funct === UInt(2)
val doAccum = funct === UInt(3)
val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)

// datapath
val addend = cmd.bits.rs1
val accum = regfile(addr)
val wdata = Mux(doWrite, addend, accum + addend)

when (cmd.fire() && (doWrite || doAccum)) {
regfile(addr) := wdata
}

when (io.mem.resp.valid) {
regfile(memRespTag) := io.mem.resp.bits.data
}

// control
when (io.mem.req.fire()) {
busy(addr) := Bool(true)
}

when (io.mem.resp.valid) {
busy(memRespTag) := Bool(false)
}

val doResp = cmd.bits.inst.xd
val stallReg = busy(addr)
val stallLoad = doLoad && !io.mem.req.ready
val stallResp = doResp && !io.resp.ready

cmd.ready := !stallReg && !stallLoad && !stallResp
// command resolved if no stalls AND not issuing a load that will need a request

// PROC RESPONSE INTERFACE
io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
// valid response if valid command, need a response, and no stalls
io.resp.bits.rd := cmd.bits.inst.rd
// Must respond with the appropriate tag or undefined behavior
io.resp.bits.data := accum
// Semantics is to always send out prior accumulator register value

io.busy := cmd.valid || busy.reduce(_||_)
// Be busy when have pending memory requests or committed possibility of pending requests
io.interrupt := Bool(false)
// Set this true to trigger an interrupt on the processor (please refer to supervisor documentation)

// MEMORY REQUEST INTERFACE
io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
io.mem.req.bits.addr := addend
io.mem.req.bits.tag := addr
io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
io.mem.req.bits.data := Bits(0) // we're not performing any stores...
io.mem.invalidate_lr := false

io.imem.acquire.valid := false
io.imem.grant.ready := false
io.dmem.head.acquire.valid := false
io.dmem.head.grant.ready := false
io.iptw.req.valid := false
io.dptw.req.valid := false
io.pptw.req.valid := false
}

Answer

The issue has been solved by adding the following piece of code to configs.scala

class WithAccumRocc extends Config(
  (pname,site,here) => pname match {
    case RoccNMemChannels => 1
    case RoccMaxTaggedMemXacts => 0
    case BuildRoCC => {
      Some((p: Parameters) =>
  Module(new AccumulatorExample()(p.alterPartial({ case CoreName => "AccumRocc" }))))
    }   
  }
)
class WithRoCCConfig extends Config(new WithAccumRocc ++ new DefaultFPGAConfig)

after this build rocket with the new config, i.e. make rocket CONFIG=WithRoCCConfig

also do not forget to regenerate the vivado project and bitstream with the same config parameter.