Search code examples
scalariscvchisel

Running dummy_rocc_test on zed board


The dummy_rocc_test runs fine on spike with the custom0 instruction but when I put it on the zed board and run it with ./fesvr-zynq pk dummy_rocc_test it gives me an error of illegal instruction.

I understand this is because the rocc io is not but in the default config of rocket chip, but I'd like to know how to enable the RoCC interface in order to run the dummy_roc_test on zed board. I tried giving the below listed value to the "BuildRoCC" as shown in the snippet

case BuildRoCC => Some(() => (Module(new AccumulatorExample, { case CoreName => "rocket" })))

but while doing make rocket, I'm getting the below error:

[error] /home/prashantravi/rocket-chip/src/main/scala/Configs.scala:100: could not find implicit value for parameter p: cde.Parameters
[error]       case BuildRoCC => Some(() => (Module(new AccumulatorExample, { case CoreName => "rocket" })))
[error]                                            ^
[error] one error found
[error] (rocketchip/compile:compileIncremental) Compilation failed
[error] Total time: 8 s, completed Oct 27, 2015 11:24:59 AM

the configs.scala code is as below.

// See LICENSE for license details.

package rocketchip

import Chisel._
import junctions._
import uncore._
import rocket._
import rocket.Util._
import zscale._
import scala.math.max
import DefaultTestSuites._
import cde.{Parameters, Config, Dump, Knob}

class DefaultConfig extends Config (
  topDefinitions = { (pname,site,here) => 
    type PF = PartialFunction[Any,Any]
    def findBy(sname:Any):Any = here[PF](site[Any](sname))(pname)
    def genCsrAddrMap: AddrMap = {
      val csrSize = (1 << 12) * (site(XLen) / 8)
      val csrs = (0 until site(NTiles)).map{ i => 
        AddrMapEntry(s"csr$i", None, MemSize(csrSize, AddrMapConsts.RW))
      }
      val scrSize = site(HtifKey).nSCR * (site(XLen) / 8)
      val scr = AddrMapEntry("scr", None, MemSize(scrSize, AddrMapConsts.RW))
      new AddrMap(csrs :+ scr)
    }
    pname match {
      case HtifKey => HtifParameters(
                       width = Dump("HTIF_WIDTH", 16),
                       nSCR = 64,
                       offsetBits = site(CacheBlockOffsetBits),
                       nCores = site(NTiles))
      //Memory Parameters
      case PAddrBits => 32
      case PgIdxBits => 12
      case PgLevels => if (site(XLen) == 64) 3 /* Sv39 */ else 2 /* Sv32 */
      case PgLevelBits => site(PgIdxBits) - log2Up(site(XLen)/8)
      case VPNBits => site(PgLevels) * site(PgLevelBits)
      case PPNBits => site(PAddrBits) - site(PgIdxBits)
      case VAddrBits => site(VPNBits) + site(PgIdxBits)
      case ASIdBits => 7
      case MIFTagBits => Dump("MEM_TAG_BITS",
                          log2Up(site(NAcquireTransactors)+2) +
                          log2Up(site(NBanksPerMemoryChannel)) +
                          log2Up(site(NMemoryChannels)))
      case MIFDataBits => Dump("MEM_DATA_BITS", 128)
      case MIFAddrBits => Dump("MEM_ADDR_BITS", site(PAddrBits) - site(CacheBlockOffsetBits))
      case MIFDataBeats => site(CacheBlockBytes) * 8 / site(MIFDataBits)
      case NastiKey => NastiParameters(
                        dataBits = site(MIFDataBits),
                        addrBits = site(PAddrBits),
                        idBits = site(MIFTagBits))
      //Params used by all caches
      case NSets => findBy(CacheName)
      case NWays => findBy(CacheName)
      case RowBits => findBy(CacheName)
      case NTLBEntries => findBy(CacheName)
      case "L1I" => {
        case NSets => Knob("L1I_SETS") //64
        case NWays => Knob("L1I_WAYS") //4
        case RowBits => 4*site(CoreInstBits)
        case NTLBEntries => 8
      }:PF
      case "L1D" => {
        case NSets => Knob("L1D_SETS") //64
        case NWays => Knob("L1D_WAYS") //4
        case RowBits => 2*site(CoreDataBits)
        case NTLBEntries => 8
      }:PF
      case ECCCode => None
      case Replacer => () => new RandomReplacement(site(NWays))
      case AmoAluOperandBits => site(XLen)
      //L1InstCache
      case BtbKey => BtbParameters()
      //L1DataCache
      case WordBits => site(XLen)
      case StoreDataQueueDepth => 17
      case ReplayQueueDepth => 16
      case NMSHRs => Knob("L1D_MSHRS")
      case NIOMSHRs => 1
      case LRSCCycles => 32 
      //L2 Memory System Params
      case NAcquireTransactors => 7
      case L2StoreDataQueueDepth => 1
      case L2DirectoryRepresentation => new NullRepresentation(site(NTiles))
      case BuildL2CoherenceManager => (p: Parameters) =>
        Module(new L2BroadcastHub()(p.alterPartial({
          case InnerTLId => "L1toL2"
          case OuterTLId => "L2toMC" })))
      //Tile Constants
      case BuildTiles => {
        TestGeneration.addSuites(rv64i.map(_("p")))
        TestGeneration.addSuites((if(site(UseVM)) List("pt","v") else List("pt")).flatMap(env => rv64u.map(_(env))))
        TestGeneration.addSuites(if(site(NTiles) > 1) List(mtBmarks, bmarks) else List(bmarks))
        List.fill(site(NTiles)){ (r: Bool, p: Parameters) =>
          Module(new RocketTile(resetSignal = r)(p.alterPartial({case TLId => "L1toL2"})))
        }
      }
      case BuildRoCC => Some(() => (Module(new AccumulatorExample, { case CoreName => "rocket" })))
      case RoccNMemChannels => 1
      //Rocket Core Constants
      case FetchWidth => 1
      case RetireWidth => 1
      case UseVM => true
      case UsePerfCounters => true
      case FastLoadWord => true
      case FastLoadByte => false
      case FastMulDiv => true
      case XLen => 64
      case UseFPU => {
        val env = if(site(UseVM)) List("p","pt","v") else List("p","pt")
        if(site(FDivSqrt)) TestGeneration.addSuites(env.map(rv64uf))
        else TestGeneration.addSuites(env.map(rv64ufNoDiv))
        true
      }
      case FDivSqrt => true
      case SFMALatency => 2
      case DFMALatency => 3
      case CoreInstBits => 32
      case CoreDataBits => site(XLen)
      case NCustomMRWCSRs => 0
      //Uncore Paramters
      case RTCPeriod => 100 // gives 10 MHz RTC assuming 1 GHz uncore clock
      case LNEndpoints => site(TLKey(site(TLId))).nManagers + site(TLKey(site(TLId))).nClients
      case LNHeaderBits => log2Ceil(site(TLKey(site(TLId))).nManagers) +
                             log2Up(site(TLKey(site(TLId))).nClients)
      case TLKey("L1toL2") => 
        TileLinkParameters(
          coherencePolicy = new MESICoherence(site(L2DirectoryRepresentation)),
          nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels),
          nCachingClients = site(NTiles),
          nCachelessClients = 1 + site(NTiles) *
                                (1 + (if(site(BuildRoCC).isEmpty) 0 else site(RoccNMemChannels))),
          maxClientXacts = max(site(NMSHRs) + site(NIOMSHRs),
                               if(site(BuildRoCC).isEmpty) 1 else site(RoccMaxTaggedMemXacts)),
          maxClientsPerPort = if(site(BuildRoCC).isEmpty) 1 else 2,
          maxManagerXacts = site(NAcquireTransactors) + 2,
          dataBits = site(CacheBlockBytes)*8)
      case TLKey("L2toMC") => 
        TileLinkParameters(
          coherencePolicy = new MEICoherence(new NullRepresentation(site(NBanksPerMemoryChannel))),
          nManagers = 1,
          nCachingClients = site(NBanksPerMemoryChannel),
          nCachelessClients = 0,
          maxClientXacts = 1,
          maxClientsPerPort = site(NAcquireTransactors) + 2,
          maxManagerXacts = 1,
          dataBits = site(CacheBlockBytes)*8)
      case TLKey("Outermost") => site(TLKey("L2toMC")).copy(dataBeats = site(MIFDataBeats))
      case NTiles => Knob("NTILES")
      case NMemoryChannels => 1
      case NBanksPerMemoryChannel => Knob("NBANKS")
      case NOutstandingMemReqsPerChannel => site(NBanksPerMemoryChannel)*(site(NAcquireTransactors)+2)
      case BankIdLSB => 0
      case CacheBlockBytes => 64
      case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes))
      case UseBackupMemoryPort => true
      case MMIOBase => BigInt(1 << 30) // 1 GB
      case ExternalIOStart => 2 * site(MMIOBase)
      case GlobalAddrMap => AddrMap(
        AddrMapEntry("mem", None, MemSize(site(MMIOBase), AddrMapConsts.RWX)),
        AddrMapEntry("conf", None, MemSubmap(site(ExternalIOStart) - site(MMIOBase), genCsrAddrMap)),
        AddrMapEntry("io", Some(site(ExternalIOStart)), MemSize(2 * site(MMIOBase), AddrMapConsts.RW)))
  }},
  knobValues = {
    case "NTILES" => 1
    case "NBANKS" => 1
    case "L1D_MSHRS" => 2
    case "L1D_SETS" => 64
    case "L1D_WAYS" => 4
    case "L1I_SETS" => 64
    case "L1I_WAYS" => 4
  }
)
class DefaultVLSIConfig extends DefaultConfig
class DefaultCPPConfig extends DefaultConfig

class With2Cores extends Config(knobValues = { case "NTILES" => 2 })
class With4Cores extends Config(knobValues = { case "NTILES" => 4 })
class With8Cores extends Config(knobValues = { case "NTILES" => 8 })

class With2Banks extends Config(knobValues = { case "NBANKS" => 2 })
class With4Banks extends Config(knobValues = { case "NBANKS" => 4 })
class With8Banks extends Config(knobValues = { case "NBANKS" => 8 })

class WithL2Cache extends Config(
  (pname,site,here) => pname match {
    case "L2_CAPACITY_IN_KB" => Knob("L2_CAPACITY_IN_KB")
    case "L2Bank" => {
      case NSets => (((here[Int]("L2_CAPACITY_IN_KB")*1024) /
                        site(CacheBlockBytes)) /
                          site(NBanksPerMemoryChannel)*site(NMemoryChannels)) /
                            site(NWays)
      case NWays => Knob("L2_WAYS")
      case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat
    }: PartialFunction[Any,Any] 
    case NAcquireTransactors => 2
    case NSecondaryMisses => 4
    case L2DirectoryRepresentation => new FullRepresentation(site(NTiles))
    case BuildL2CoherenceManager => (p: Parameters) =>
      Module(new L2HellaCacheBank()(p.alterPartial({
         case CacheName => "L2Bank"
         case InnerTLId => "L1toL2"
         case OuterTLId => "L2toMC"})))
  },
  knobValues = { case "L2_WAYS" => 8; case "L2_CAPACITY_IN_KB" => 2048 }
)

class WithL2Capacity2048 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 2048 })
class WithL2Capacity1024 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 1024 })
class WithL2Capacity512 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 512 })
class WithL2Capacity256 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 256 })
class WithL2Capacity128 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 128 })
class WithL2Capacity64 extends Config(knobValues = { case "L2_CAPACITY_IN_KB" => 64 })

class DefaultL2Config extends Config(new WithL2Cache ++ new DefaultConfig)
class DefaultL2VLSIConfig extends Config(new WithL2Cache ++ new DefaultVLSIConfig)
class DefaultL2CPPConfig extends Config(new WithL2Cache ++ new DefaultCPPConfig)
class DefaultL2FPGAConfig extends Config(new WithL2Capacity64 ++ new WithL2Cache ++ new DefaultFPGAConfig)

class WithZscale extends Config(
  (pname,site,here) => pname match {
    case BuildZscale => {
      TestGeneration.addSuites(List(rv32ui("p"), rv32um("p")))
      TestGeneration.addSuites(List(zscaleBmarks))
      (r: Bool, p: Parameters) => Module(new Zscale(r)(p))
    }
    case BootROMCapacity => Dump("BOOT_CAPACITY", 16*1024)
    case DRAMCapacity => Dump("DRAM_CAPACITY", 64*1024*1024)
  }
)

class ZscaleConfig extends Config(new WithZscale ++ new DefaultConfig)

class FPGAConfig extends Config (
  (pname,site,here) => pname match {
    case NAcquireTransactors => 4
    case UseBackupMemoryPort => false
  }
)

class DefaultFPGAConfig extends Config(new FPGAConfig ++ new DefaultConfig)

class SmallConfig extends Config (
    topDefinitions = { (pname,site,here) => pname match {
      case UseFPU => false
      case FastMulDiv => false
      case NTLBEntries => 4
      case BtbKey => BtbParameters(nEntries = 8)
    }},
  knobValues = {
    case "L1D_SETS" => 64
    case "L1D_WAYS" => 1
    case "L1I_SETS" => 64
    case "L1I_WAYS" => 1
  }
)

class DefaultFPGASmallConfig extends Config(new SmallConfig ++ new DefaultFPGAConfig)

class ExampleSmallConfig extends Config(new SmallConfig ++ new DefaultConfig)

class MultibankConfig extends Config(new With2Banks ++ new DefaultConfig)
class MultibankL2Config extends Config(
  new With2Banks ++ new WithL2Cache ++ new DefaultConfig)

The Rocc.scala where the actual accumulator example is given is as below

// See LICENSE for license details.

package rocket

import Chisel._
import uncore._
import Util._
import cde.{Parameters, Field}

case object RoccMaxTaggedMemXacts extends Field[Int]
case object RoccNMemChannels extends Field[Int]

class RoCCInstruction extends Bundle
{
  val funct = Bits(width = 7)
  val rs2 = Bits(width = 5)
  val rs1 = Bits(width = 5)
  val xd = Bool()
  val xs1 = Bool()
  val xs2 = Bool()
  val rd = Bits(width = 5)
  val opcode = Bits(width = 7)
}

class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) {
  val inst = new RoCCInstruction
  val rs1 = Bits(width = xLen)
  val rs2 = Bits(width = xLen)
}

class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
  val rd = Bits(width = 5)
  val data = Bits(width = xLen)
}

class RoCCInterface(implicit p: Parameters) extends Bundle {
  val cmd = Decoupled(new RoCCCommand).flip
  val resp = Decoupled(new RoCCResponse)
  val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
  val busy = Bool(OUTPUT)
  val s = Bool(INPUT)
  val interrupt = Bool(OUTPUT)

  // These should be handled differently, eventually
  val imem = new ClientUncachedTileLinkIO
  val dmem = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO)
  val iptw = new TLBPTWIO
  val dptw = new TLBPTWIO
  val pptw = new TLBPTWIO
  val exception = Bool(INPUT)
}

abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) {
  val io = new RoCCInterface
  io.mem.req.bits.phys := Bool(true) // don't perform address translation
}

class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) {
  val regfile = Mem(UInt(width = xLen), n)
  val busy = Reg(init=Vec(Bool(false), n))

  val cmd = Queue(io.cmd)
  val funct = cmd.bits.inst.funct
  val addr = cmd.bits.inst.rs2(log2Up(n)-1,0)
  val doWrite = funct === UInt(0)
  val doRead = funct === UInt(1)
  val doLoad = funct === UInt(2)
  val doAccum = funct === UInt(3)
  val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)

  // datapath
  val addend = cmd.bits.rs1
  val accum = regfile(addr)
  val wdata = Mux(doWrite, addend, accum + addend)

  when (cmd.fire() && (doWrite || doAccum)) {
    regfile(addr) := wdata
  }

  when (io.mem.resp.valid) {
    regfile(memRespTag) := io.mem.resp.bits.data
  }

  // control
  when (io.mem.req.fire()) {
    busy(addr) := Bool(true)
  }

  when (io.mem.resp.valid) {
    busy(memRespTag) := Bool(false)
  }

  val doResp = cmd.bits.inst.xd
  val stallReg = busy(addr)
  val stallLoad = doLoad && !io.mem.req.ready
  val stallResp = doResp && !io.resp.ready

  cmd.ready := !stallReg && !stallLoad && !stallResp
    // command resolved if no stalls AND not issuing a load that will need a request

  // PROC RESPONSE INTERFACE
  io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
    // valid response if valid command, need a response, and no stalls
  io.resp.bits.rd := cmd.bits.inst.rd
    // Must respond with the appropriate tag or undefined behavior
  io.resp.bits.data := accum
    // Semantics is to always send out prior accumulator register value

  io.busy := cmd.valid || busy.reduce(_||_)
    // Be busy when have pending memory requests or committed possibility of pending requests
  io.interrupt := Bool(false)
    // Set this true to trigger an interrupt on the processor (please refer to supervisor documentation)

  // MEMORY REQUEST INTERFACE
  io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
  io.mem.req.bits.addr := addend
  io.mem.req.bits.tag := addr
  io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
  io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
  io.mem.req.bits.data := Bits(0) // we're not performing any stores...
  io.mem.invalidate_lr := false

  io.imem.acquire.valid := false
  io.imem.grant.ready := false
  io.dmem.head.acquire.valid := false
  io.dmem.head.grant.ready := false
  io.iptw.req.valid := false
  io.dptw.req.valid := false
  io.pptw.req.valid := false
}

Solution

  • The issue has been solved by adding the following piece of code to configs.scala

    class WithAccumRocc extends Config(
      (pname,site,here) => pname match {
        case RoccNMemChannels => 1
        case RoccMaxTaggedMemXacts => 0
        case BuildRoCC => {
          Some((p: Parameters) =>
      Module(new AccumulatorExample()(p.alterPartial({ case CoreName => "AccumRocc" }))))
        }   
      }
    )
    class WithRoCCConfig extends Config(new WithAccumRocc ++ new DefaultFPGAConfig)
    

    after this build rocket with the new config, i.e. make rocket CONFIG=WithRoCCConfig

    also do not forget to regenerate the vivado project and bitstream with the same config parameter.