diff --git a/Bender.yml b/Bender.yml index 34549261..018a6306 100644 --- a/Bender.yml +++ b/Bender.yml @@ -94,8 +94,10 @@ sources: - test/watchdog.sv - test/idma_intf.sv - test/idma_sim_mem.sv + - test/stream_throttle.sv # Level 1: - test/idma_test.sv + - test/axi_throttle.sv # Level 2: - test/tb_idma_backend.sv - test/tb_idma_nd_backend.sv diff --git a/CHANGELOG.md b/CHANGELOG.md index 45ad8adc..5f0e8958 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## Unreleased +### Added +Add support to enable non-ideal behavior of the testbench memory. ## 0.2.3 - 11-08-2022 ### Changed diff --git a/jobs/4d-ext/man_linear_2D.txt b/jobs/4d-ext/man_linear_2D.txt new file mode 100644 index 00000000..7b3e81c2 --- /dev/null +++ b/jobs/4d-ext/man_linear_2D.txt @@ -0,0 +1,17 @@ +2048 +0x0 +0x10000000 +256 +256 +0 +0 +8 +0x10000 +0x10000 +0 +0x0 +0x0 +0 +0x0 +0x0 +0 diff --git a/jobs/backend/man_linear_large.txt b/jobs/backend/man_linear_large.txt new file mode 100644 index 00000000..c6ae0036 --- /dev/null +++ b/jobs/backend/man_linear_large.txt @@ -0,0 +1,8 @@ +16384 +0x0 +0x10000000 +256 +256 +0 +0 +0 diff --git a/test/axi_throttle.sv b/test/axi_throttle.sv new file mode 100644 index 00000000..ff3c2bcc --- /dev/null +++ b/test/axi_throttle.sv @@ -0,0 +1,101 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +/// Throttles an AXI4+ATOP bus. The maximum number of outstanding transfers have to +/// be set as a compile-time parameter, whereas the number of outstanding transfers can be set +/// during runtime. This module assumes either in-order processing of the requests or +/// indistinguishability of the request/responses (all ARs and AWs have the same ID respectively). +module axi_throttle #( + /// The maximum amount of allowable outstanding write requests + parameter int unsigned MaxNumAwPending = 1, + /// The maximum amount of allowable outstanding read requests + parameter int unsigned MaxNumArPending = 1, + /// AXI4+ATOP request type + parameter type axi_req_t = logic, + /// AXI4+ATOP response type + parameter type axi_rsp_t = logic, + /// The width of the write credit counter (*DO NOT OVERWRITE*) + parameter int unsigned WCntWidth = cf_math_pkg::idx_width(MaxNumAwPending), + /// The width of the read credit counter (*DO NOT OVERWRITE*) + parameter int unsigned RCntWidth = cf_math_pkg::idx_width(MaxNumArPending), + /// The type of the write credit counter (*DO NOT OVERWRITE*) + parameter type w_credit_t = logic [WCntWidth-1:0], + /// The type of the read credit counter (*DO NOT OVERWRITE*) + parameter type r_credit_t = logic [RCntWidth-1:0] +) ( + /// Clock + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + + /// AXI4+ATOP request in + input axi_req_t req_i, + /// AXI4+ATOP response out + output axi_rsp_t rsp_o, + /// AXI4+ATOP request out + output axi_req_t req_o, + /// AXI4+ATOP response in + input axi_rsp_t rsp_i, + + /// Amount of write credit (number of outstanding write transfers) + input w_credit_t w_credit_i, + /// Amount of read credit (number of outstanding read transfers) + input r_credit_t r_credit_i +); + + // ax throttled valids + logic throttled_aw_valid; + logic throttled_ar_valid; + + // ax throttled readies + logic throttled_aw_ready; + logic throttled_ar_ready; + + // limit Aw requests -> wait for b + stream_throttle #( + .MaxNumPending ( MaxNumAwPending ) + ) i_stream_throttle_aw ( + .clk_i, + .rst_ni, + .req_valid_i ( req_i.aw_valid ), + .req_valid_o ( throttled_aw_valid ), + .req_ready_i ( rsp_i.aw_ready ), + .req_ready_o ( throttled_aw_ready ), + .rsp_valid_i ( rsp_i.b_valid ), + .rsp_ready_i ( req_i.b_ready ), + .credit_i ( w_credit_i ) + ); + + // limit Ar requests -> wait for r.last + stream_throttle #( + .MaxNumPending ( MaxNumArPending ) + ) i_stream_throttle_ar ( + .clk_i, + .rst_ni, + .req_valid_i ( req_i.ar_valid ), + .req_valid_o ( throttled_ar_valid ), + .req_ready_i ( rsp_i.ar_ready ), + .req_ready_o ( throttled_ar_ready ), + .rsp_valid_i ( rsp_i.r_valid & rsp_i.r.last ), + .rsp_ready_i ( req_i.r_ready ), + .credit_i ( r_credit_i ) + ); + + // connect the throttled request bus (its a through connection - except for the ax valids) + always_comb begin : gen_throttled_req_conn + req_o = req_i; + req_o.aw_valid = throttled_aw_valid; + req_o.ar_valid = throttled_ar_valid; + end + + // connect the throttled response bus (its a through connection - except for the ax readies) + always_comb begin : gen_throttled_rsp_conn + rsp_o = rsp_i; + rsp_o.aw_ready = throttled_aw_ready; + rsp_o.ar_ready = throttled_ar_ready; + end + +endmodule : axi_throttle diff --git a/test/stream_throttle.sv b/test/stream_throttle.sv new file mode 100644 index 00000000..f989fad1 --- /dev/null +++ b/test/stream_throttle.sv @@ -0,0 +1,83 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +`include "common_cells/registers.svh" + +/// Throttles a ready valid handshaked bus. The maximum number of outstanding transfers have to +/// be set as a compile-time parameter, whereas the number of outstanding transfers can be set +/// during runtime. This module assumes either in-order processing of the requests or +/// indistinguishability of the request/responses. +module stream_throttle #( + /// The maximum amount of allowable outstanding requests + parameter int unsigned MaxNumPending = 1, + /// The width of the credit counter (*DO NOT OVERWRITE*) + parameter int unsigned CntWidth = cf_math_pkg::idx_width(MaxNumPending), + /// The type of the credit counter (*DO NOT OVERWRITE*) + parameter type credit_t = logic [CntWidth-1:0] +) ( + + /// Clock + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + + /// Request valid in + input logic req_valid_i, + /// Request valid out + output logic req_valid_o, + /// Request ready in + input logic req_ready_i, + /// Request ready out + output logic req_ready_o, + + /// Response valid in + input logic rsp_valid_i, + /// Response ready in + input logic rsp_ready_i, + + /// Amount of credit (number of outstanding transfers) + input credit_t credit_i +); + + // we use a credit counter to keep track of how many transfers are pending at any point in + // time. Valid is passed-through if there is credit. + credit_t credit_d, credit_q; + + // we have credit available + logic credit_available; + + // implement the counter. If credit is available let the valid pass, else block it. Increment + // the counter once a request happens, decrement once a response arrives. Assumes in-order + // responses. + always_comb begin : proc_credit_counter + + // default: keep state + credit_d = credit_q; + + // on valid outgoing request: count up + if (req_ready_o & req_valid_o) begin + credit_d = credit_d + 'd1; + end + + // on valid response: count down + if (rsp_valid_i & rsp_ready_i) begin + credit_d = credit_d - 'd1; + end + end + + // credit is available + assign credit_available = credit_q <= (credit_i - 'd1); + + // a request id passed on as valid if the input is valid and we have credit. + assign req_valid_o = req_valid_i & credit_available; + + // a request id passed on as ready if the input is ready and we have credit. + assign req_ready_o = req_ready_i & credit_available; + + // state + `FF(credit_q, credit_d, '0, clk_i, rst_ni) + +endmodule : stream_throttle diff --git a/test/tb_idma_backend.sv b/test/tb_idma_backend.sv index f3b7873a..1402cc27 100644 --- a/test/tb_idma_backend.sv +++ b/test/tb_idma_backend.sv @@ -17,11 +17,15 @@ module tb_idma_backend import idma_pkg::*; #( parameter int unsigned AxiIdWidth = 1, parameter int unsigned TFLenWidth = 32, parameter int unsigned MemSysDepth = 0, + parameter int unsigned MemNumReqOutst = 1, + parameter int unsigned MemLatency = 0, + parameter int unsigned WatchDogNumCycles = 100, parameter bit MaskInvalidData = 1, parameter bit RAWCouplingAvail = 1, parameter bit HardwareLegalizer = 1, parameter bit RejectZeroTransfers = 1, - parameter bit ErrorHandling = 1 + parameter bit ErrorHandling = 1, + parameter bit IdealMemory = 1 ); // timing parameters @@ -35,9 +39,6 @@ module tb_idma_backend import idma_pkg::*; #( localparam bit PrintFifoInfo = 1'b1; // TB parameters - // watchdog trips after N cycles of inactivity - localparam int unsigned WatchDogNumCycles = 100; - // dependent parameters localparam int unsigned StrbWidth = DataWidth / 8; localparam int unsigned OffsetWidth = $clog2(StrbWidth); @@ -275,12 +276,57 @@ module tb_idma_backend import idma_pkg::*; #( assign idma_dv.rsp_valid = rsp_valid; assign idma_dv.eh_req_ready = eh_req_ready; - // error trigger - always_comb begin - axi_req_mem = axi_req; - axi_rsp = axi_rsp_mem; + // throttle the AXI bus + if (IdealMemory) begin : gen_ideal_mem_connect + + // if the memory is ideal: 0 cycle latency here + assign axi_req_mem = axi_req; + assign axi_rsp = axi_rsp_mem; + + end else begin : gen_delayed_mem_connect + + // the throttled AXI buses + axi_req_t axi_req_throttled; + axi_rsp_t axi_rsp_throttled; + + // axi throttle: limit the amount of concurrent requests in the memory system + axi_throttle #( + .MaxNumAwPending ( 2**32 - 1 ), + .MaxNumArPending ( 2**32 - 1 ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_rsp_t ) + ) i_axi_throttle ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .req_i ( axi_req ), + .rsp_o ( axi_rsp ), + .req_o ( axi_req_throttled ), + .rsp_i ( axi_rsp_throttled ), + .w_credit_i ( MemNumReqOutst ), + .r_credit_i ( MemNumReqOutst ) + ); + + // delay the signals using AXI4 multicuts + axi_multicut #( + .NoCuts ( MemLatency ), + .aw_chan_t ( axi_aw_chan_t ), + .w_chan_t ( axi_w_chan_t ), + .b_chan_t ( axi_b_chan_t ), + .ar_chan_t ( axi_ar_chan_t ), + .r_chan_t ( axi_r_chan_t ), + .axi_req_t ( axi_req_t ), + .axi_resp_t ( axi_rsp_t ) + ) i_axi_multicut ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .slv_req_i ( axi_req_throttled ), + .slv_resp_o ( axi_rsp_throttled ), + .mst_req_o ( axi_req_mem ), + .mst_resp_i ( axi_rsp_mem ) + ); end + //-------------------------------------- // Various TB Tasks //-------------------------------------- diff --git a/test/tb_idma_nd_backend.sv b/test/tb_idma_nd_backend.sv index f171ff46..6b604f70 100644 --- a/test/tb_idma_nd_backend.sv +++ b/test/tb_idma_nd_backend.sv @@ -20,11 +20,15 @@ module tb_idma_nd_backend import idma_pkg::*; #( parameter int unsigned NumDim = 4, parameter int unsigned RepWidth = 32, parameter int unsigned StrideWidth = 32, + parameter int unsigned MemNumReqOutst = 1, + parameter int unsigned MemLatency = 0, + parameter int unsigned WatchDogNumCycles = 100, parameter bit MaskInvalidData = 1, parameter bit RAWCouplingAvail = 1, parameter bit HardwareLegalizer = 1, parameter bit RejectZeroTransfers = 1, - parameter bit ErrorHandling = 1 + parameter bit ErrorHandling = 1, + parameter bit IdealMemory = 1 ); // timing parameters @@ -38,9 +42,6 @@ module tb_idma_nd_backend import idma_pkg::*; #( localparam bit PrintFifoInfo = 1'b1; // TB parameters - // watchdog trips after N cycles of inactivity - localparam int unsigned WatchDogNumCycles = 100; - // dependent parameters localparam int unsigned StrbWidth = DataWidth / 8; localparam int unsigned OffsetWidth = $clog2(StrbWidth); @@ -345,12 +346,57 @@ module tb_idma_nd_backend import idma_pkg::*; #( assign idma_nd_dv.rsp_valid = nd_rsp_valid; assign idma_nd_dv.eh_req_ready = eh_req_ready; - // error trigger - always_comb begin - axi_req_mem = axi_req; - axi_rsp = axi_rsp_mem; + // throttle the AXI bus + if (IdealMemory) begin : gen_ideal_mem_connect + + // if the memory is ideal: 0 cycle latency here + assign axi_req_mem = axi_req; + assign axi_rsp = axi_rsp_mem; + + end else begin : gen_delayed_mem_connect + + // the throttled AXI buses + axi_req_t axi_req_throttled; + axi_rsp_t axi_rsp_throttled; + + // axi throttle: limit the amount of concurrent requests in the memory system + axi_throttle #( + .MaxNumAwPending ( 2**32 - 1 ), + .MaxNumArPending ( 2**32 - 1 ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_rsp_t ) + ) i_axi_throttle ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .req_i ( axi_req ), + .rsp_o ( axi_rsp ), + .req_o ( axi_req_throttled ), + .rsp_i ( axi_rsp_throttled ), + .w_credit_i ( MemNumReqOutst ), + .r_credit_i ( MemNumReqOutst ) + ); + + // delay the signals using AXI4 multicuts + axi_multicut #( + .NoCuts ( MemLatency ), + .aw_chan_t ( axi_aw_chan_t ), + .w_chan_t ( axi_w_chan_t ), + .b_chan_t ( axi_b_chan_t ), + .ar_chan_t ( axi_ar_chan_t ), + .r_chan_t ( axi_r_chan_t ), + .axi_req_t ( axi_req_t ), + .axi_resp_t ( axi_rsp_t ) + ) i_axi_multicut ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .slv_req_i ( axi_req_throttled ), + .slv_resp_o ( axi_rsp_throttled ), + .mst_req_o ( axi_req_mem ), + .mst_resp_i ( axi_rsp_mem ) + ); end + //-------------------------------------- // Various TB Tasks //--------------------------------------