Commit a215404d authored by Tomasz Wlostowski's avatar Tomasz Wlostowski

rtl: added optional hardware division and multiply-high instruction

parent 91e88f93
files = [ "urv_cpu.v",
"urv_divide.v",
"urv_exec.v",
"urv_fetch.v",
"urv_decode.v",
......
......@@ -23,6 +23,7 @@
// SPARTAN6 - Xilinx Spartan-6 FPGA
// GENERIC - Generic, HW-independent
`define URV_PLATFORM_SPARTAN6 1
`define URV_PLATFORM_GENERIC 1
//`define URV_PLATFORM_SPARTAN6 1
//`define URV_PLATFORM_ALTERA 1
/*
uRV - a tiny and dumb RISC-V core
Copyright (c) 2015 CERN
Author: Tomasz Włostowski <tomasz.wlostowski@cern.ch>
......@@ -16,7 +16,7 @@
You should have received a copy of the GNU Lesser General Public
License along with this library.
*/
`include "urv_defs.v"
......@@ -27,32 +27,33 @@ module urv_cpu
#(
parameter g_timer_frequency = 1000,
parameter g_clock_frequency = 100000000,
parameter g_with_hw_divide = 0,
parameter g_with_hw_div = 1,
parameter g_with_hw_mulh = 1,
parameter g_with_hw_debug = 0,
parameter g_debug_breakpoints = 6
)
)
(
input clk_i,
input rst_i,
input irq_i,
input clk_i,
input rst_i,
input irq_i,
// instruction mem I/F
output [31:0] im_addr_o,
input [31:0] im_data_i,
input im_valid_i,
input im_valid_i,
// data mem I/F
output [31:0] dm_addr_o,
output [31:0] dm_data_s_o,
input [31:0] dm_data_l_i,
output [3:0] dm_data_select_o,
input dm_ready_i,
input dm_ready_i,
output dm_store_o,
output dm_load_o,
input dm_load_done_i,
input dm_store_done_i,
output dm_store_o,
output dm_load_o,
input dm_load_done_i,
input dm_store_done_i,
// Debug I/F
// Debug mode is entered either when dbg_force_i is set, or when the ebreak
......@@ -89,7 +90,7 @@ module urv_cpu
wire [31:0] x2f_pc_bra;
wire x2f_bra;
wire x2f_dbg_toggle;
// F->D stage interface
wire [31:0] f2d_pc, f2d_ir;
wire f2d_valid;
......@@ -101,7 +102,7 @@ module urv_cpu
wire [4:0] rf_rd;
wire [31:0] rf_rd_value;
wire rf_rd_write;
// D->X1 stage interface
wire d2x_valid;
wire [31:0] d2x_pc;
......@@ -122,6 +123,7 @@ module urv_cpu
wire d2x_is_csr, d2x_is_mret, d2x_is_ebreak, d2x_csr_load_en;
wire [31:0] d2x_alu_op1, d2x_alu_op2;
wire d2x_use_op1, d2x_use_op2;
wire d2x_is_multiply, d2x_is_divide;
// X1/M->X2/W interface
wire [4:0] x2w_rd;
......@@ -140,10 +142,10 @@ module urv_cpu
wire [31:0] x_rs2_value, x_rs1_value;
wire [31:0] rf_bypass_rd_value = x2w_rd_value;
wire rf_bypass_rd_write = rf_rd_write && !x2w_load; // multiply/shift too?
// misc stuff
wire [39:0] csr_time, csr_cycles;
urv_fetch fetch
(
.clk_i(clk_i),
......@@ -175,7 +177,12 @@ module urv_cpu
);
urv_decode decode
urv_decode
#(
.g_with_hw_div(g_with_hw_div),
.g_with_hw_mulh(g_with_hw_mulh)
)
decode
(
.clk_i(clk_i),
.rst_i(rst_i),
......@@ -209,6 +216,8 @@ module urv_cpu
.x_is_load_o(d2x_is_load),
.x_is_store_o(d2x_is_store),
.x_is_undef_o(d2x_is_undef),
.x_is_multiply_o(d2x_is_multiply),
.x_is_divide_o(d2x_is_divide),
.x_rd_source_o(d2x_rd_source),
.x_rd_write_o(d2x_rd_write),
.x_csr_sel_o (d2x_csr_sel),
......@@ -238,7 +247,7 @@ module urv_cpu
.x_rs1_value_o(x_rs1_value),
.x_rs2_value_o(x_rs2_value),
.w_rd_i(rf_rd),
.w_rd_value_i(rf_rd_value),
.w_rd_store_i(rf_rd_write),
......@@ -246,13 +255,18 @@ module urv_cpu
.w_bypass_rd_write_i(rf_bypass_rd_write),
.w_bypass_rd_value_i(rf_bypass_rd_value)
);
// Execute 1/Memory stage (X1/M)
urv_exec execute
urv_exec
#(
.g_with_hw_div(g_with_hw_div),
.g_with_hw_mulh(g_with_hw_mulh)
)
execute
(
.clk_i(clk_i),
.rst_i(rst_i),
.irq_i ( irq_i ),
// pipe control
......@@ -266,12 +280,12 @@ module urv_cpu
// from D stage
.d_valid_i(d2x_valid),
.d_is_csr_i(d2x_is_csr),
.d_is_csr_i ( d2x_is_csr ),
.d_is_mret_i(d2x_is_mret),
.d_is_ebreak_i(d2x_is_ebreak),
.d_dbg_mode_i(dbg_enabled_o),
.d_csr_imm_i(d2x_csr_imm),
.d_csr_sel_i(d2x_csr_sel),
.d_csr_imm_i ( d2x_csr_imm ),
.d_csr_sel_i (d2x_csr_sel),
.d_pc_i(d2x_pc),
.d_rd_i(d2x_rd),
.d_fun_i(d2x_fun),
......@@ -280,20 +294,21 @@ module urv_cpu
.d_is_add_i(d2x_is_add),
.d_is_load_i(d2x_is_load),
.d_is_store_i(d2x_is_store),
//.d_is_divide_i(1'b0),
.d_is_undef_i(d2x_is_undef),
.d_is_multiply_i(d2x_is_multiply),
.d_is_divide_i(d2x_is_divide),
.d_alu_op1_i(d2x_alu_op1),
.d_alu_op2_i(d2x_alu_op2),
.d_use_op1_i(d2x_use_op1),
.d_use_op2_i(d2x_use_op2),
.d_rd_source_i(d2x_rd_source),
.d_rd_write_i(d2x_rd_write),
.d_rd_write_i(d2x_rd_write),
.d_opcode_i(d2x_opcode),
.d_shifter_sign_i(d2x_shifter_sign),
// to F stage (branches)
.f_branch_target_o(x2f_pc_bra), // fixme: consistent naming
.f_branch_take_o(x2f_bra),
.f_branch_target_o (x2f_pc_bra), // fixme: consistent naming
.f_branch_take_o (x2f_bra),
.f_dbg_toggle_o(x2f_dbg_toggle),
// to X2/W stage
......@@ -354,7 +369,7 @@ module urv_cpu
.dm_data_l_i(dm_data_l_i),
.dm_load_done_i(dm_load_done_i),
.dm_store_done_i(dm_store_done_i),
// to register file
.rf_rd_value_o(rf_rd_value),
.rf_rd_o(rf_rd),
......@@ -362,19 +377,19 @@ module urv_cpu
);
// Built-in timer
urv_timer
urv_timer
#(
.g_timer_frequency(g_timer_frequency),
.g_clock_frequency(g_clock_frequency)
)
ctimer
)
ctimer
(
.clk_i(clk_i),
.rst_i(rst_i),
.csr_time_o(csr_time),
.csr_cycles_o(csr_cycles),
.sys_tick_o(sys_tick)
);
......@@ -389,7 +404,7 @@ module urv_cpu
x2f_bra_d0 <= x2f_bra;
x2f_bra_d1 <= x2f_bra_d0;
end
// pipeline control
assign f_stall = x_stall_req || w_stall_req || d_stall_req;
assign d_stall = x_stall_req || w_stall_req;
......@@ -397,5 +412,5 @@ module urv_cpu
assign x_kill = x2f_bra || x2f_bra_d0 || x2f_bra_d1;
assign d_kill = x2f_bra || x2f_bra_d0;
endmodule // urv_cpu
......@@ -16,7 +16,7 @@
You should have received a copy of the GNU Lesser General Public
License along with this library.
*/
`include "urv_defs.v"
......@@ -25,49 +25,49 @@
module urv_csr
(
input clk_i,
input rst_i,
input x_stall_i,
input x_kill_i,
input d_is_csr_i,
input [2:0] d_fun_i,
input [4:0] d_csr_imm_i,
input [11:0] d_csr_sel_i,
input [31:0] d_rs1_i,
output [31:0] x_rd_o,
input [39:0] csr_time_i,
input [39:0] csr_cycles_i,
input clk_i,
input rst_i,
input x_stall_i,
input x_kill_i,
input d_is_csr_i,
input [2:0] d_fun_i,
input [4:0] d_csr_imm_i,
input [11:0] d_csr_sel_i,
input [31:0] d_rs1_i,
output [31:0] x_rd_o,
input [39:0] csr_time_i,
input [39:0] csr_cycles_i,
// interrupt management
output [31:0] x_csr_write_value_o,
output [31:0] x_csr_write_value_o,
input [31:0] csr_mstatus_i,
input [31:0] csr_mip_i,
input [31:0] csr_mie_i,
input [31:0] csr_mepc_i,
input [31:0] csr_mcause_i,
input [31:0] csr_mstatus_i,
input [31:0] csr_mip_i,
input [31:0] csr_mie_i,
input [31:0] csr_mepc_i,
input [31:0] csr_mcause_i,
// Debug mailboxes
input [31:0] dbg_mbx_data_i,
input dbg_mbx_write_i,
output [31:0] dbg_mbx_data_o
);
reg [31:0] csr_mscratch;
reg [31:0] csr_mscratch;
reg [31:0] mbx_data;
reg [31:0] csr_in1;
reg [31:0] csr_in2;
reg [31:0] csr_out;
always@*
case(d_csr_sel_i) // synthesis full_case parallel_case
`CSR_ID_CYCLESL: csr_in1 <= csr_cycles_i[31:0];
......@@ -86,6 +86,9 @@ module urv_csr
assign x_rd_o = csr_in1;
genvar i;
always@*
case (d_fun_i)
`CSR_OP_CSRRWI,
......@@ -96,25 +99,32 @@ module urv_csr
csr_in2 <= d_rs1_i;
endcase // case (d_fun_i)
always@*
case(d_fun_i) // synthesis full_case parallel_case
`CSR_OP_CSRRWI,
`CSR_OP_CSRRW:
// Write
csr_out <= csr_in2;
`CSR_OP_CSRRCI,
`CSR_OP_CSRRC:
// Clear bits
csr_out <= ~csr_in2 & csr_in1;
`CSR_OP_CSRRSI,
`CSR_OP_CSRRS:
// Set bits
csr_out <= csr_in2 | csr_in1;
default:
csr_out <= 32'hx;
endcase // case (d_csr_op_i)
always@(posedge clk_i)
generate
for (i=0;i<32;i=i+1)
begin : gen_csr_bits
always@*
case(d_fun_i) // synthesis full_case parallel_case
`CSR_OP_CSRRWI,
`CSR_OP_CSRRW:
csr_out[i] <= csr_in2[i];
`CSR_OP_CSRRCI,
`CSR_OP_CSRRC:
csr_out[i] <= csr_in2[i] ? 1'b0 : csr_in1[i];
`CSR_OP_CSRRSI,
`CSR_OP_CSRRS:
csr_out[i] <= csr_in2[i] ? 1'b1 : csr_in1[i];
default:
csr_out[i] <= 32'hx;
endcase // case (d_csr_op_i)
end // for (i=0;i<32;i=i+1)
endgenerate
always@(posedge clk_i)
if(rst_i)
begin
csr_mscratch <= 0;
......
......@@ -16,68 +16,73 @@
You should have received a copy of the GNU Lesser General Public
License along with this library.
*/
`include "urv_defs.v"
`timescale 1ns/1ps
module urv_decode
module urv_decode
(
input clk_i,
input rst_i,
input clk_i,
input rst_i,
// pipeline control
input d_stall_i,
input d_kill_i,
output d_stall_req_o,
input d_stall_i,
input d_kill_i,
output d_stall_req_o,
// from Fetch stage
input [31:0] f_ir_i,
input [31:0] f_pc_i,
input f_valid_i,
input [31:0] f_ir_i,
input [31:0] f_pc_i,
input f_valid_i,
// to Register File (not registered: direct from fetch stage).
output [4:0] rf_rs1_o,
output [4:0] rf_rs2_o,
// to Register File
output [4:0] rf_rs1_o,
output [4:0] rf_rs2_o,
// to Execute 1 stage
output x_valid_o,
output x_valid_o,
output reg [31:0] x_pc_o,
output [4:0] x_rs1_o,
output [4:0] x_rs2_o,
output [4:0] x_rd_o,
output [4:0] x_rs1_o,
output [4:0] x_rs2_o,
output [4:0] x_rd_o,
output reg [2:0] x_fun_o,
output [4:0] x_opcode_o,
output reg x_shifter_sign_o,
output reg x_is_signed_alu_op_o,
output reg x_is_add_o,
output reg x_is_load_o,
output reg x_is_store_o,
output reg x_is_undef_o,
output [4:0] x_opcode_o,
output reg x_shifter_sign_o,
output reg x_is_signed_alu_op_o,
output reg x_is_add_o,
output reg x_is_load_o,
output reg x_is_store_o,
output reg x_is_undef_o,
output reg [2:0] x_rd_source_o,
output x_rd_write_o,
output x_rd_write_o,
output reg [11:0] x_csr_sel_o,
output reg [4:0] x_csr_imm_o,
output reg x_is_csr_o,
output reg x_is_csr_o,
output reg x_is_mret_o,
output reg x_is_ebreak_o,
output reg [31:0] x_imm_o,
output reg [31:0] x_alu_op1_o,
output reg [31:0] x_alu_op2_o,
output reg x_use_op1_o,
output reg x_use_op2_o
output reg x_use_op1_o,
output reg x_use_op2_o,
output reg x_is_divide_o,
output reg x_is_multiply_o
);
parameter g_with_hw_div = 0;
parameter g_with_hw_mulh = 0;
wire [4:0] f_rs1 = f_ir_i[19:15];
wire [4:0] f_rs2 = f_ir_i[24:20];
wire [4:0] f_rd = f_ir_i[11:7];
wire [4:0] d_opcode = f_ir_i[6:2];
wire [2:0] d_fun = f_ir_i[14:12];
reg [4:0] x_rs1;
reg [4:0] x_rs2;
reg [4:0] x_rd;
......@@ -85,8 +90,8 @@ module urv_decode
reg x_valid;
reg x_is_shift;
reg x_rd_write;
assign x_rs1_o = x_rs1;
assign x_rs2_o = x_rs2;
assign x_rd_o = x_rd;
......@@ -98,14 +103,13 @@ module urv_decode
reg load_hazard;
wire d_is_shift = (d_fun == `FUNC_SL || d_fun == `FUNC_SR) &&
wire d_is_shift = !f_ir_i[25] && (d_fun == `FUNC_SL || d_fun == `FUNC_SR) &&
(d_opcode == `OPC_OP || d_opcode == `OPC_OP_IMM );
reg x_is_mul;
wire d_is_mul = (f_ir_i[25] && d_fun == `FUNC_MUL);
// hazard detect combinatorial logic
// x_rd, x_is_shift, x_is_mul is from the previous instruction.
// hazzard detect combinatorial logic
always@*
if ( x_valid && f_valid_i && ( (f_rs1 == x_rd) || (f_rs2 == x_rd) ) && (!d_kill_i) )
begin
......@@ -124,7 +128,7 @@ module urv_decode
end
else
load_hazard <= 0;
reg inserting_nop;
// bubble insertion following a hazard (only 1 bubble).
......@@ -136,11 +140,11 @@ module urv_decode
assign d_stall_req_o = load_hazard && !inserting_nop;
assign x_valid_o = x_valid;
always@(posedge clk_i)
if(rst_i || d_kill_i)
if(rst_i || d_kill_i )
begin
x_pc_o <= 0;
x_valid <= 0;
......@@ -159,7 +163,7 @@ module urv_decode
x_rd <= f_rd;
x_opcode <= d_opcode;
end
// ALU function decoding
// attempt to reuse ALU for jump address generation
always@(posedge clk_i)
......@@ -170,22 +174,22 @@ module urv_decode
default:
x_fun_o <= d_fun;
endcase // case (f_opcode)
always@(posedge clk_i)
if(!d_stall_i)
x_shifter_sign_o <= f_ir_i[30];
wire [31:0] d_imm_i = { {21{ f_ir_i[31] }}, f_ir_i[30:25], f_ir_i[24:21], f_ir_i[20] };
wire[31:0] d_imm_i = { {21{ f_ir_i[31] }}, f_ir_i[30:25], f_ir_i[24:21], f_ir_i[20] };
wire [31:0] d_imm_s = { {21{ f_ir_i[31] }}, f_ir_i[30:25], f_ir_i[11:8], f_ir_i[7] };
wire [31:0] d_imm_b = { {20{ f_ir_i[31] }}, f_ir_i[7], f_ir_i[30:25], f_ir_i[11:8], 1'b0 };
wire [31:0] d_imm_u = { f_ir_i[31], f_ir_i[30:20], f_ir_i[19:12], 12'h000 };
wire [31:0] d_imm_j = { {12{f_ir_i[31]}},
f_ir_i[19:12],
wire [31:0] d_imm_j = { {12{f_ir_i[31]}},
f_ir_i[19:12],
f_ir_i[20], f_ir_i[30:25], f_ir_i[24:21], 1'b0};
reg [31:0] d_imm;
// Immediate decode, comb part
always@*
......@@ -203,27 +207,27 @@ module urv_decode
always@(posedge clk_i)
if(!d_stall_i)
x_imm_o <= d_imm;
// ALU operand decoding
always@(posedge clk_i)
if(!d_stall_i)
begin
case (d_opcode)
`OPC_LUI, `OPC_AUIPC:
`OPC_LUI, `OPC_AUIPC:
begin
x_alu_op1_o <= d_imm;
x_alu_op1_o <= d_imm;
x_use_op1_o <= 1;
end
`OPC_JAL, `OPC_JALR:
begin
x_alu_op1_o <= 4;
x_alu_op1_o <= 4;
x_use_op1_o <= 1;
end
default:
begin
x_alu_op1_o <= 32'hx;
x_alu_op1_o <= 32'hx;
x_use_op1_o <= 0;
end
endcase // case (d_opcode)
......@@ -239,7 +243,7 @@ module urv_decode
x_alu_op2_o <= f_pc_i;
x_use_op2_o <= 1;
end
`OPC_OP_IMM:
begin
x_alu_op2_o <= d_imm;
......@@ -248,15 +252,15 @@ module urv_decode
default:
begin
x_alu_op2_o <= 32'hx;
x_alu_op2_o <= 32'hx;
x_use_op2_o <= 0;
end
endcase // case (d_opcode_i)
end // if (!d_stall_i)
wire d_rd_nonzero = (f_rd != 0);
// misc decoding
always@(posedge clk_i)
if(!d_stall_i)
......@@ -265,7 +269,7 @@ module urv_decode
x_is_load_o <= d_opcode == `OPC_LOAD && !load_hazard;
x_is_store_o <= d_opcode == `OPC_STORE && !load_hazard;
x_is_mul <= d_is_mul;
case (d_opcode)
......@@ -286,18 +290,65 @@ module urv_decode
x_is_add_o <= 1;
endcase // case (d_opcode)
// all multiply/divide instructions except MUL
x_is_undef_o <= (d_opcode == `OPC_OP && f_ir_i[25] && d_fun != `FUNC_MUL);
// all multiply/divide instructions except
if( d_opcode == `OPC_OP && f_ir_i[25] )
begin
case (d_fun)
`FUNC_MUL:
begin
x_is_multiply_o <= 1;
x_is_divide_o <= 0;
x_is_undef_o <= 0;
end
`FUNC_MULH, `FUNC_MULHU, `FUNC_MULHSU:
begin
x_is_multiply_o <= 1;
x_is_undef_o <= !g_with_hw_mulh;
end
`FUNC_DIV, `FUNC_DIVU, `FUNC_REM, `FUNC_REMU:
begin
x_is_multiply_o <= 0;
x_is_divide_o <= 1;
x_is_undef_o <= !g_with_hw_div;
end
default:
begin
x_is_multiply_o <= 0;
x_is_divide_o <= 0;
x_is_undef_o <= 0;
end
endcase // case (d_fun)
end else begin // if ( d_opcode == `OPC_OP && f_ir_i[25] )
x_is_multiply_o <= 0;
x_is_divide_o <= 0;
x_is_undef_o <= 0;
end // else: !if( d_opcode == `OPC_OP && f_ir_i[25] )
if(d_is_shift)
x_rd_source_o <= `RD_SOURCE_SHIFTER;
else if (d_opcode == `OPC_SYSTEM)
x_rd_source_o <= `RD_SOURCE_CSR;
else if (d_opcode == `OPC_OP && !d_fun[2] && f_ir_i[25])
x_rd_source_o <= `RD_SOURCE_MULTIPLY;
else if (d_opcode == `OPC_OP && f_ir_i[25])
begin
if( !d_fun[2] )
begin
if( d_fun == `FUNC_MUL )
x_rd_source_o <= `RD_SOURCE_MULTIPLY;
else
x_rd_source_o <= `RD_SOURCE_MULH;
end
else
x_rd_source_o <= `RD_SOURCE_DIVIDE;
end
else
x_rd_source_o <= `RD_SOURCE_ALU;
// rdest write value
case (d_opcode)
`OPC_OP_IMM, `OPC_OP, `OPC_JAL, `OPC_JALR, `OPC_LUI, `OPC_AUIPC:
......@@ -308,19 +359,19 @@ module urv_decode
x_rd_write <= 0;
endcase // case (d_opcode)
end // if (!d_stall_i)
// CSR/supervisor instructions
always@(posedge clk_i)
if (!d_stall_i)
begin
x_csr_imm_o <= f_ir_i[19:15];
x_csr_sel_o <= f_ir_i[31:20];
x_is_csr_o <= (d_opcode == `OPC_SYSTEM) && (d_fun != 0);
x_is_mret_o <= (d_opcode == `OPC_SYSTEM) && (d_fun == 0) && (f_ir_i [31:20] == 12'b0011000_00010);
x_is_ebreak_o <= (d_opcode == `OPC_SYSTEM) && (d_fun == 0) && (f_ir_i [31:20] == 12'b0000000_00001);
end
if (!d_stall_i)
begin
x_csr_imm_o <= f_ir_i[19:15];
x_csr_sel_o <= f_ir_i[31:20];
x_is_csr_o <= (d_opcode == `OPC_SYSTEM) && (d_fun != 0);
x_is_mret_o <= (d_opcode == `OPC_SYSTEM) && (d_fun == 0) && (f_ir_i [31:20] == 12'b0011000_00010);
x_is_ebreak_o <= (d_opcode == `OPC_SYSTEM) && (d_fun == 0) && (f_ir_i [31:20] == 12'b0000000_00001);
end
assign x_rd_write_o = x_rd_write;
endmodule // rv_decode
......@@ -16,21 +16,21 @@
You should have received a copy of the GNU Lesser General Public
License along with this library.
*/
`include "urv_config.v"
// opcodes (bits[6:2], bits[1:0] == 2'b11)
`define OPC_OP_IMM 5'b00100
`define OPC_LUI 5'b01101
`define OPC_AUIPC 5'b00101
`define OPC_OP 5'b01100
`define OPC_JAL 5'b11011
`define OPC_JALR 5'b11001
`define OPC_LUI 5'b01101
`define OPC_AUIPC 5'b00101
`define OPC_OP 5'b01100
`define OPC_JAL 5'b11011
`define OPC_JALR 5'b11001
`define OPC_BRANCH 5'b11000
`define OPC_LOAD 5'b00000
`define OPC_STORE 5'b01000
`define OPC_LOAD 5'b00000
`define OPC_STORE 5'b01000
`define OPC_SYSTEM 5'b11100
`define BRA_EQ 3'b000
......@@ -65,17 +65,17 @@
`define FUNC_REM 3'b110
`define FUNC_REMU 3'b111
`define RD_SOURCE_ALU 3'b000
`define RD_SOURCE_ALU 3'b000
`define RD_SOURCE_SHIFTER 3'b010
`define RD_SOURCE_MULTIPLY 3'b001
// `define RD_SOURCE_DIVIDE 3'b011 ?? conflict with CSR
`define RD_SOURCE_DIVIDE 3'b100
`define RD_SOURCE_CSR 3'b011
`define RD_SOURCE_MULH 3'b111
// CSR addresses
`define CSR_ID_CYCLESH 12'hc80
`define CSR_ID_CYCLESL 12'hc00
`define CSR_ID_CYCLESL 12'hc00
`define CSR_ID_TIMEH 12'hc81
`define CSR_ID_TIMEL 12'hc01
`define CSR_ID_TIMEL 12'hc01
`define CSR_ID_MSCRATCH 12'h340
`define CSR_ID_MEPC 12'h341
`define CSR_ID_MSTATUS 12'h300
......
......@@ -61,8 +61,8 @@ module urv_divide
always@*
case(state) // synthesis full_case parallel_case
0: begin alu_op1 <= 'hx; alu_op2 <= 'hx; end
1: begin alu_op1 <= 0; alu_op2 <= d_rs1_i; end
2: begin alu_op1 <= 0; alu_op2 <= d_rs2_i; end
1: begin alu_op1 <= 0; alu_op2 <= n; end
2: begin alu_op1 <= 0; alu_op2 <= d; end
35: begin alu_op1 <= 0; alu_op2 <= q; end
36: begin alu_op1 <= 0; alu_op2 <= r; end
default: begin alu_op1 <= r_next; alu_op2 <= d; end
......@@ -74,15 +74,14 @@ module urv_divide
wire alu_ge = ~alu_result [32];
wire start_divide = !x_stall_i && !x_kill_i && d_valid_i && d_is_divide_i;
wire done = (is_rem ? state == 37 : state == 36 );
wire busy = ( state != 0 && !done );
wire start_divide = !x_kill_i && d_valid_i && d_is_divide_i && !busy;
assign x_stall_req_o = (start_divide || !done);
assign x_stall_req_o = (d_valid_i && d_is_divide_i && !done);
always@*
case (state) // synthesis full_case parallel_case
case (state)
1:
alu_sub <= n_sign;
2:
......@@ -95,8 +94,6 @@ module urv_divide
alu_sub <= 1;
endcase // case (state)
always@(posedge clk_i)
if(rst_i || done)
state <= 0;
......@@ -104,7 +101,7 @@ module urv_divide
state <= state + 1;
always@(posedge clk_i)
case ( state ) // synthesis full_case parallel_case
case ( state )
0:
if(start_divide)
begin
......@@ -113,14 +110,24 @@ module urv_divide
is_rem <= (d_fun_i == `FUNC_REM || d_fun_i ==`FUNC_REMU);
n_sign <= d_rs1_i[31];
d_sign <= d_rs2_i[31];
n <= d_rs1_i;
d <= d_rs2_i;
if( d_fun_i == `FUNC_DIVU || d_fun_i == `FUNC_REMU )
begin
n_sign <= 0;
d_sign <= 0;
end else begin
n_sign <= d_rs1_i[31];
d_sign <= d_rs2_i[31];
end
end
1:
1:
n <= alu_result[31:0];
2:
2:
d <= alu_result[31:0];
35:
......
This diff is collapsed.
......@@ -67,9 +67,9 @@ module urv_mult18x18
.C(48'h0),
.CARRYIN(),
.D(18'b0),
.CEA(1'b0),
.CEB(1'b0),
.CEC(1'b0),
.CEA(1'b1),
.CEB(1'b1),
.CEC(1'b1),
.CECARRYIN(1'b0),
.CED(1'b0),
.CEM(~stall_i),
......@@ -90,6 +90,7 @@ module urv_mult18x18
initial force D1.OPMODE_dly = 8'd1;
// synthesis translate_on
endmodule // urv_mult18x18
`endif // `ifdef PLATFORM_SPARTAN6
......@@ -102,10 +103,10 @@ module urv_mult18x18
input stall_i,
input [17:0] x_i,
input [17:0] y_i,
input signed [17:0] x_i,
input signed [17:0] y_i,
output reg [35:0] q_o
output reg signed [35:0] q_o
);
......@@ -157,34 +158,51 @@ endmodule // urv_mult18x18
module urv_multiply
(
input clk_i,
input rst_i,
input x_stall_i,
input [31:0] d_rs1_i,
input [31:0] d_rs2_i,
input [2:0] d_fun_i,
output reg [31:0] w_rd_o
input clk_i,
input rst_i,
input x_stall_i,
input x_kill_i,
output x_stall_req_o,
input [31:0] d_rs1_i,
input [31:0] d_rs2_i,
input [2:0] d_fun_i,
input d_is_multiply_i,
// multiply result for MUL instructions, bypassed to W-stage to achieve 1-cycle performance
// without much penalty on clock speed
output reg [31:0] w_rd_o,
// multiply result for MULH(S)(U) instructions. Goes to the X stage
// destination value mux.
output reg [31:0] x_rd_o
);
parameter g_with_hw_mulh = 0;
wire[17:0] xl_u = {1'b0, d_rs1_i[16:0] };
wire[17:0] xl_u = {1'b0, d_rs1_i[16:0] }; // 17 bits
wire[17:0] yl_u = {1'b0, d_rs2_i[16:0] };
wire[17:0] xl_s = {d_rs1_i[16], d_rs1_i[16:0] };
wire[17:0] yl_s = {d_rs2_i[16], d_rs2_i[16:0] };
wire[17:0] xh = { {3{d_rs1_i[31]}}, d_rs1_i[31:17] };
wire[17:0] yh = { {3{d_rs2_i[31]}}, d_rs2_i[31:17] };
wire sign_extend_xh = (d_fun_i == `FUNC_MULH || d_fun_i == `FUNC_MULHSU) ? d_rs1_i[31] : 1'b0 ;
wire sign_extend_yh = (d_fun_i == `FUNC_MULH) ? d_rs2_i[31] : 1'b0 ;
wire signed [17:0] xh = { {3{sign_extend_xh}}, d_rs1_i[31:17] }; // 15 bits
wire signed [17:0] yh = { {3{sign_extend_yh}}, d_rs2_i[31:17] };
wire [35:0] yl_xl, yl_xh, yh_xl;
wire signed [35:0] xh_yh;
wire signed [35:0] yl_xl, yl_xh, yh_xl;
reg mul_stall_req;
reg mul_stall_req_d0;
reg mul_stall_req_d1;
urv_mult18x18 mul0
(
.clk_i(clk_i),
.rst_i(rst_i),
.stall_i(x_stall_i),
.stall_i(1'b0),
.x_i(xl_u),
.y_i(yl_u),
......@@ -195,9 +213,9 @@ module urv_multiply
(
.clk_i(clk_i),
.rst_i(rst_i),
.stall_i(x_stall_i),
.stall_i(1'b0),
.x_i(xl_s),
.x_i(xl_u),
.y_i(yh),
.q_o(yh_xl)
);
......@@ -206,14 +224,72 @@ module urv_multiply
(
.clk_i(clk_i),
.rst_i(rst_i),
.stall_i(x_stall_i),
.stall_i(1'b0),
.x_i(yl_s),
.x_i(yl_u),
.y_i(xh),
.q_o(yl_xh)
);
generate
if (g_with_hw_mulh)
begin
urv_mult18x18 mul3
(
.clk_i(clk_i),
.rst_i(rst_i),
.stall_i(1'b0),
.x_i(yh),
.y_i(xh),
.q_o(xh_yh)
);
end
endgenerate
wire [63:0] mul_result;
wire [63:0] yl_xl_ext = yl_xl;
wire [63:0] yh_xl_ext = { {15{yh_xl[35] } }, yh_xl, 17'h0 };
wire [63:0] yl_xh_ext = { {15{yl_xh[35] } }, yl_xh, 17'h0 };
wire [63:0] yh_xh_ext = { xh_yh, 34'h0 };
generate
if (g_with_hw_mulh)
begin
assign mul_result = yl_xl_ext + yh_xl_ext + yl_xh_ext + yh_xh_ext;
always@(*)
mul_stall_req <= !x_kill_i && !mul_stall_req_d1 && d_is_multiply_i && d_fun_i != `FUNC_MUL;
always@(posedge clk_i)
x_rd_o <= mul_result[63:32];
always@(posedge clk_i)
if (rst_i)
begin
mul_stall_req_d0 <= 0;
mul_stall_req_d1 <= 0;
end else begin
mul_stall_req_d0 <= mul_stall_req;
mul_stall_req_d1 <= mul_stall_req_d0;
end
end
else // no hardware multiply high
begin
assign mul_result = yl_xl + {yl_xh[14:0], 17'h0} + {yh_xl[14:0], 17'h0};
always@*
mul_stall_req <= 1'b0;
end // else: !if(g_with_hw_mulh)
endgenerate
assign x_stall_req_o = mul_stall_req;
always@*
w_rd_o <= yl_xl + {yl_xh[14:0], 17'h0} + {yh_xl[14:0], 17'h0};
w_rd_o <= mul_result[31:0];
endmodule // urv_multiply
......@@ -87,13 +87,13 @@ module urv_regfile
wire [31:0] rs1_regfile;
wire [31:0] rs2_regfile;
wire write = (w_rd_store_i && (w_rd_i != 0));
wire write = (!d_stall_i && w_rd_store_i && (w_rd_i != 0));
urv_regmem bank0
(
.clk_i(clk_i),
.rst_i (rst_i ),
.en1_i(!d_stall_i),
.en1_i(1'b1),
.a1_i(rf_rs1_i),
.q1_o(rs1_regfile),
......@@ -106,7 +106,7 @@ module urv_regfile
(
.clk_i(clk_i),
.rst_i (rst_i ),
.en1_i(!d_stall_i),
.en1_i(1'b1),
.a1_i(rf_rs2_i),
.q1_o(rs2_regfile),
......
......@@ -16,7 +16,7 @@
You should have received a copy of the GNU Lesser General Public
License along with this library.
*/
`include "urv_defs.v"
......@@ -29,17 +29,17 @@ module urv_writeback
input rst_i,
output w_stall_req_o,
input [2:0] x_fun_i,
input x_load_i,
input x_store_i,
input [31:0] x_dm_addr_i,
input [4:0] x_rd_i,
input [31:0] x_rd_value_i,
input x_rd_write_i,
input x_valid_i,
input [31:0] x_shifter_rd_value_i,
input [31:0] x_multiply_rd_value_i,
......@@ -48,7 +48,7 @@ module urv_writeback
input [31:0] dm_data_l_i,
input dm_load_done_i,
input dm_store_done_i,
output [31:0] rf_rd_value_o,
output [4:0] rf_rd_o,
output rf_rd_write_o
......@@ -68,7 +68,7 @@ module urv_writeback
2'b11: load_value <= {{24{dm_data_l_i[31]}}, dm_data_l_i[31:24] };
default: load_value <= 32'hx;
endcase // case ( x_dm_addr_i [1:0] )
`LDST_BU:
case ( x_dm_addr_i [1:0] )
2'b00: load_value <= {24'h0, dm_data_l_i[7:0] };
......@@ -77,7 +77,7 @@ module urv_writeback
2'b11: load_value <= {24'h0, dm_data_l_i[31:24] };
default: load_value <= 32'hx;
endcase // case ( x_dm_addr_i [1:0] )
`LDST_H:
case ( x_dm_addr_i [1:0] )
2'b00, 2'b01: load_value <= {{16{dm_data_l_i[15]}}, dm_data_l_i[15:0] };
......@@ -91,7 +91,7 @@ module urv_writeback
2'b10, 2'b11: load_value <= {16'h0, dm_data_l_i[31:16] };
default: load_value <= 32'hx;
endcase // case ( x_dm_addr_i [1:0] )
`LDST_L: load_value <= dm_data_l_i;
default: load_value <= 32'hx;
......@@ -100,7 +100,7 @@ module urv_writeback
reg rf_rd_write;
reg [31:0] rf_rd_value;
always@*
if( x_load_i )
rf_rd_value <= load_value;
......@@ -117,6 +117,13 @@ module urv_writeback
else
rf_rd_write <= x_rd_write_i & x_valid_i;
// synthesis translate_off
always@(posedge clk_i)
if(!rst_i)
if(rf_rd_write && (^rf_rd_value === 1'hx) )
$error("Attempt to write unknown value to reg %x", x_rd_i);
// synthesis translate_on
assign rf_rd_write_o = rf_rd_write;
assign rf_rd_value_o = rf_rd_value;
assign rf_rd_o = x_rd_i;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment