Commit 7630ec3a authored by Tomasz Wlostowski's avatar Tomasz Wlostowski

added multiply

parent 24a3f937
......@@ -16,4 +16,5 @@ files = [ "rv_cpu.v",
"rv_multiply.v",
"rv_csr.v",
"rv_timer.v",
"rv_exceptions.v"];
"rv_exceptions.v",
"../sim/rv_icache_model.sv"];
......@@ -145,6 +145,7 @@ module rv_cpu
wire w_stall_req;
wire x_stall_req;
/* -----\/----- EXCLUDED -----\/-----
chipscope_icon icon0(
.CONTROL0( CONTROL) );
......@@ -155,6 +156,7 @@ module rv_cpu
.TRIG1(TRIG1),
.TRIG2(TRIG2),
.TRIG3(TRIG3) );
-----/\----- EXCLUDED -----/\----- */
assign TRIG0 = f2d_pc;
assign TRIG1 = f2d_ir;
......@@ -221,12 +223,15 @@ module rv_cpu
wire [4:0] x2w_rd;
wire [31:0] x2w_rd_value;
wire [31:0] x2w_rd_shifter;
wire [31:0] x2w_rd_multiply;
wire [31:0] x2w_dm_addr;
wire x2w_rd_write;
wire [2:0] x2w_fun;
wire x2w_store;
wire x2w_load;
wire [1:0] x2w_rd_source;
wire [31:0] x_rs2_value, x_rs1_value;
......@@ -321,6 +326,9 @@ module rv_cpu
.w_rd_o(x2w_rd),
.w_rd_value_o(x2w_rd_value),
.w_rd_write_o(x2w_rd_write),
.w_rd_source_o ( x2w_rd_source),
.w_rd_shifter_o ( x2w_rd_shifter),
.w_rd_multiply_o ( x2w_rd_multiply),
.dm_addr_o(dm_addr_o),
.dm_data_s_o(dm_data_s_o),
......@@ -352,8 +360,11 @@ module rv_cpu
.x_store_i(x2w_store),
.x_rd_i(x2w_rd),
.x_rd_source_i(x2w_rd_source),
.x_rd_value_i(x2w_rd_value),
.x_rd_write_i(x2w_rd_write),
.x_shifter_rd_value_i ( x2w_rd_shifter),
.x_multiply_rd_value_i ( x2w_rd_multiply),
.x_dm_addr_i(x2w_dm_addr),
.dm_data_l_i(dm_data_l_i),
......@@ -380,7 +391,6 @@ module rv_cpu
else if(stall_timeout != 63)
stall_timeout <= stall_timeout + 1;
assign TRIG2[16] = (stall_timeout == 63) ? 1'b1 : 1'b0;
......@@ -406,43 +416,16 @@ module rv_cpu
x2f_bra_d1 <= x2f_bra_d0;
end
// load to Rd in W stage while Rs1/Rs2==RD in fetch stage: assert interlock
/* -----\/----- EXCLUDED -----\/-----
reg interlock_load, interlock_load_d0 = 0;
always@*
interlock_load <= d_load_hazard && x_load_comb;
always@(posedge clk_i)
if(interlock_load_d0)
interlock_load_d0 <= 0;
else
interlock_load_d0 <= interlock_load;
-----/\----- EXCLUDED -----/\----- */
assign f_stall = x_stall_req || w_stall_req || d_stall_req;
// || (interlock_load && !interlock_load_d0);
assign x_stall = x_stall_req || w_stall_req;
// || (interlock_load && !interlock_load_d0);
assign d_stall = x_stall_req || w_stall_req;
// || (interlock_load && !interlock_load_d0);
assign w_stall = 0;
assign x_kill = x2f_bra || x2f_bra_d0 || x2f_bra_d1;
assign d_kill = x2f_bra || x2f_bra_d0;
assign f_kill = x2f_bra ;
//&& ~x_bra_d0;
endmodule // rv_cpu
......
......@@ -78,6 +78,10 @@ module rv_exec
output reg [31:0] w_rd_value_o,
output reg w_rd_write_o,
output reg [31:0] w_dm_addr_o,
output reg [1:0] w_rd_source_o,
output [31:0] w_rd_shifter_o,
output [31:0] w_rd_multiply_o,
// Data memory I/F (address/store)
output [31:0] dm_addr_o,
......@@ -120,7 +124,7 @@ module rv_exec
reg f_branch_take;
wire x_stall_req_shifter;
wire x_stall_req_shifter = 0;
wire x_stall_req_multiply = 0;
wire x_stall_req_divide = 0;
......@@ -288,7 +292,6 @@ module rv_exec
.rst_i(rst_i),
.x_stall_i(x_stall_i),
.w_stall_req_i(w_stall_req_i),
.d_valid_i(d_valid_i),
.d_rs1_i(rs1),
.d_shamt_i(alu_op2[4:0]),
......@@ -296,15 +299,24 @@ module rv_exec
.d_shifter_sign_i(d_shifter_sign_i),
.d_is_shift_i(d_is_shift_i),
.x_stall_req_o(x_stall_req_shifter),
.x_rd_o(rd_shifter)
.w_rd_o(w_rd_shifter_o)
);
rv_multiply multiplier
(
.clk_i(clk_i),
.rst_i(rst_i),
.x_stall_i(x_stall_i),
.d_rs1_i(rs1),
.d_rs2_i(rs2),
.d_fun_i(d_fun),
.w_rd_o (w_rd_multiply_o)
);
always@*
case (d_rd_source_i)
`RD_SOURCE_ALU: rd_value <= alu_result;
`RD_SOURCE_SHIFTER : rd_value <= rd_shifter;
`RD_SOURCE_CSR: rd_value <= rd_csr;
default: rd_value <= 32'hx;
endcase // case (x_rd_source_i)
......@@ -312,9 +324,7 @@ module rv_exec
// generate load/store address
always@*
begin
dm_addr <= rs1 + d_imm_i;
//[11:0]);
dm_addr <= rs1 + { {20{d_imm_i[11]}}, d_imm_i[11:0] };
end
reg unaligned_addr;
......@@ -418,6 +428,8 @@ module rv_exec
w_load_o <= 0;
w_store_o <= 0;
w_dm_addr_o <= 0;
w_rd_source_o <= 0;
end else if (!x_stall_i) begin
......@@ -429,7 +441,8 @@ module rv_exec
// if(!shifter_stall_req)
w_rd_value_o <= rd_value;
w_rd_write_o <= d_rd_write_i && !x_kill_i && d_valid_i && !exception;
w_rd_source_o <= d_rd_source_i;
w_fun_o <= d_fun_i;
w_load_o <= is_load && !exception;
w_store_o <= is_store && !exception;
......
......@@ -47,33 +47,40 @@ module rv_fetch
reg rst_d;
reg [31:0] pc_next;
reg [31:0] pc_plus_4;
always@*
if( x_bra_i )
pc_next <= x_pc_bra_i;
else if (!rst_d || f_stall_i || !im_valid_i)
pc_next <= pc;
else
pc_next <= pc + 4;
pc_next <= pc_plus_4;
assign f_ir_o = ir;
assign im_addr_o = pc_next;
always@(posedge clk_i)
if (rst_i) begin
pc <= 0;
pc_plus_4 <= 4;
ir <= 0;
f_valid_o <= 0;
rst_d <= 0;
end else begin
rst_d <= 1;
if (!f_stall_i) begin
if(im_valid_i)
pc_plus_4 <= (x_bra_i ? x_pc_bra_i : pc_plus_4) + 4;
pc <= pc_next;
f_pc_o <= pc;
if(im_valid_i) begin
......
......@@ -7,24 +7,14 @@ module rv_multiply
input clk_i,
input rst_i,
input x_stall_i,
input w_stall_req_i,
input d_valid_i,
input d_is_mul_i,
input [31:0] d_rs1_i,
input [31:0] d_rs2_i,
input [4:0] d_opcode_i,
input [2:0] d_fun_i,
output reg [31:0] x_rd_o,
output x_stall_req_o
output reg [31:0] w_rd_o
);
parameter g_latency = 2;
wire sign_a = ( d_fun_i == `FUNC_MUL || d_fun_i == `FUNC_MULHSU ) ? d_rs1_i[31] : 1'b0;
wire sign_b = ( d_fun_i == `FUNC_MUL ) ? d_rs2_i[31] : 1'b0;
......@@ -32,19 +22,21 @@ module rv_multiply
wire [32:0] b = { sign_b, d_rs2_i };
reg [65:0] stage0, stage1;
reg [2:0] s2_fun;
always@(posedge clk_i)
if(!x_stall_i)
begin
stage0 <= $signed(a) * $signed(b);
stage1 <= stage0;
s2_fun <= d_fun_i;
end
always@*
if( d_fun_i != `FUNC_MUL )
x_rd_o <= stage1[63:32];
if( s2_fun != `FUNC_MUL )
w_rd_o <= stage0[63:32];
else
x_rd_o <= stage1[31:0];
w_rd_o <= stage0[31:0];
endmodule // rv_multiply
......
......@@ -61,7 +61,7 @@ module rv_decode
output reg x_is_signed_compare_o,
output reg x_is_signed_alu_op_o,
output reg x_is_add_o,
output reg x_is_shift_o,
output x_is_shift_o,
output reg [2:0] x_rd_source_o,
output reg x_rd_write_o,
......@@ -81,6 +81,8 @@ module rv_decode
reg [4:0] x_rd;
reg [4:0] x_opcode;
reg x_valid;
reg x_is_shift;
assign x_rs1_o = x_rs1;
assign x_rs2_o = x_rs2;
......@@ -116,15 +118,30 @@ module rv_decode
wire [4:0] d_opcode = f_ir_i[6:2];
/* -----\/----- EXCLUDED -----\/-----
always@(posedge clk_i)
if(!d_stall_i)
x_load_hazard_o <= ( (f_rs1 == x_rd) || (f_rs2 == x_rd) ) && (!d_kill_i) && (x_opcode == `OPC_LOAD);
-----/\----- EXCLUDED -----/\----- */
reg load_hazard;
// attempt to reuse ALU for jump address generation
wire [2:0] d_fun = f_ir_i[14:12];
wire load_hazard = x_valid && f_valid_i && ( (f_rs1 == x_rd) || (f_rs2 == x_rd) ) && (!d_kill_i) && (x_opcode == `OPC_LOAD);
wire d_is_shift = (d_fun == `FUNC_SL || d_fun == `FUNC_SR) &&
(d_opcode == `OPC_OP || d_opcode == `OPC_OP_IMM );
always@*
if (x_valid && f_valid_i && ( (f_rs1 == x_rd) || (f_rs2 == x_rd) ) && (!d_kill_i) )
begin
case (x_opcode)
`OPC_LOAD:
load_hazard <= 1;
`OPC_OP,
`OPC_OP_IMM:
load_hazard <= x_is_shift;
default:
load_hazard <= 0;
endcase // case (x_opcode)
end else
load_hazard <= 0;
//wire load_hazard = x_valid && f_valid_i && ( (f_rs1 == x_rd) || (f_rs2 == x_rd) ) && (!d_kill_i) && (x_opcode == `OPC_LOAD);
reg inserting_nop = 0;
......@@ -133,12 +150,11 @@ module rv_decode
inserting_nop <= 0;
else if (!d_stall_i)
begin
if (inserting_nop)
inserting_nop <= 0;
else
inserting_nop <= load_hazard;
end
if (inserting_nop)
inserting_nop <= 0;
else
inserting_nop <= load_hazard;
end
assign d_stall_req_o = load_hazard && !inserting_nop;
......@@ -155,8 +171,6 @@ module rv_decode
x_shamt_o <= f_ir_i[24:20];
end
// attempt to reuse ALU for jump address generation
wire [2:0] d_fun = f_ir_i[14:12];
always@(posedge clk_i)
if(!d_stall_i)
......@@ -199,14 +213,12 @@ module rv_decode
endcase // case (opcode)
end // always@ (posedge clk_i)
wire d_is_shift = (d_fun == `FUNC_SL || d_fun == `FUNC_SR) &&
(d_opcode == `OPC_OP || d_opcode == `OPC_OP_IMM );
// misc decoding
always@(posedge clk_i)
if(!d_stall_i)
begin
x_is_shift_o <= d_is_shift;
x_is_shift <= d_is_shift;
x_is_signed_compare_o <= ( ( d_opcode == `OPC_BRANCH) && ( ( d_fun == `BRA_GE )|| (d_fun == `BRA_LT ) ) )
|| ( ( (d_opcode == `OPC_OP) || (d_opcode == `OPC_OP_IMM) ) && (d_fun == `FUNC_SLT ) );
......@@ -225,6 +237,8 @@ module rv_decode
x_rd_source_o <= `RD_SOURCE_SHIFTER;
else if (d_opcode == `OPC_SYSTEM)
x_rd_source_o <= `RD_SOURCE_CSR;
else if (d_opcode == `OPC_OP && !d_fun[2] && f_ir_i[25])
x_rd_source_o <= `RD_SOURCE_MULTIPLY;
else
x_rd_source_o <= `RD_SOURCE_ALU;
......@@ -256,7 +270,7 @@ module rv_decode
end
assign x_is_shift_o = x_is_shift;
......
......@@ -25,15 +25,18 @@
module rv_regmem
(
input clk_i,
input rst_i,
input clk_i,
input rst_i,
input [4:0] a1_i,
output [31:0] q1_o,
input en1_i,
input en2_i,
input [4:0] a1_i,
output [31:0] q1_o,
input [4:0] a2_i,
input [31:0] d2_i,
input we2_i
input [4:0] a2_i,
input [31:0] d2_i,
input we2_i
);
reg [31:0] ram [0:31];
......
......@@ -34,14 +34,10 @@ module rv_shifter
input rst_i,
input x_stall_i,
input w_stall_req_i,
output x_stall_req_o,
input d_valid_i,
input d_valid_i,
input [31:0] d_rs1_i,
output [31:0] x_rd_o,
output reg[31:0] w_rd_o,
input [4:0] d_shamt_i,
input [2:0] d_fun_i,
......@@ -50,16 +46,6 @@ module rv_shifter
wire extend_sign = ((d_fun_i == `FUNC_SR) && d_shifter_sign_i) ? d_rs1_i[31] : 1'b0;
wire shifter_req = !w_stall_req_i && d_valid_i && d_is_shift_i;
reg shifter_req_d0;
always@(posedge clk_i)
if(shifter_req_d0 && !x_stall_i)
shifter_req_d0 <= 0;
else
shifter_req_d0 <= shifter_req;
assign x_stall_req_o = shifter_req && !shifter_req_d0;
reg [31:0] shift_pre, shift_16, shift_8, s1_out;
......@@ -79,12 +65,23 @@ module rv_shifter
// stage 1 pipe register
always@(posedge clk_i)
if (!x_stall_i)
begin
s2_extend_sign <= extend_sign;
s2_shift <= d_shamt_i;
s2_func <= d_fun_i;
s1_out <= shift_8;
end
/* -----\/----- EXCLUDED -----\/-----
always@*
begin
s2_extend_sign <= extend_sign;
s2_shift <= d_shamt_i;
s2_func <= d_fun_i;
s1_out <= shift_8;
end
-----/\----- EXCLUDED -----/\----- */
reg [31:0] shift_4, shift_2, shift_1, shift_post;
......@@ -96,8 +93,15 @@ module rv_shifter
shift_1 <= s2_shift[0] ? { {1{s2_extend_sign}}, shift_2[31:1] } : shift_2;
shift_post <= (s2_func == `FUNC_SL) ? `reverse_bits(shift_1) : shift_1;
end
assign x_rd_o = shift_post;
/* -----\/----- EXCLUDED -----\/-----
always@(posedge clk_i)
if(!x_stall_i)
w_rd_o <= shift_post;
-----/\----- EXCLUDED -----/\----- */
always@*
w_rd_o <= shift_post;
endmodule // rv_shifter
......
......@@ -41,13 +41,17 @@ module rv_writeback
input [31:0] x_rd_value_i,
input x_rd_write_i,
input [31:0] x_shifter_rd_value_i,
input [31:0] x_multiply_rd_value_i,
input [1:0] x_rd_source_i,
input [31:0] dm_data_l_i,
input dm_load_done_i,
input dm_store_done_i,
output [31:0] rf_rd_value_o,
output reg [31:0] rf_rd_value_o,
output [4:0] rf_rd_o,
output rf_rd_write_o,
output reg rf_rd_write_o,
output [31:0] TRIG2
);
......@@ -99,20 +103,17 @@ module rv_writeback
end // always@ *
reg pending_load, pending_store, pending_load_hazard ;
reg pending_load, pending_store;
always@(posedge clk_i)
if(rst_i) begin
pending_load <= 0;
pending_store <= 0;
pending_load_hazard <= 0;
end else begin
end else begin
if(x_load_i && !dm_load_done_i) begin
pending_load <= 1;
pending_load_hazard <= x_load_hazard_i;
end else if (dm_load_done_i) begin
pending_load <= 0;
pending_load_hazard <= 0;
end
if(x_store_i && !dm_store_done_i)
......@@ -121,32 +122,34 @@ module rv_writeback
pending_store <= 0;
end
reg interlock_d = 0;
wire interlock = 0 ;
/* -----\/----- EXCLUDED -----\/-----
reg interlock_d ;
wire interlock = ( ( x_load_i || pending_load ) && dm_load_done_i && (x_load_hazard_i || pending_load_hazard ) );
always@(posedge clk_i)
if (rst_i) begin
interlock_d <= 0;
end else begin
if(interlock_d)
interlock_d <= 0;
else
interlock_d <= interlock;
end
-----/\----- EXCLUDED -----/\----- */
always@*
if( x_load_i || pending_load )
rf_rd_value_o <= load_value;
else if ( x_rd_source_i == `RD_SOURCE_SHIFTER )
rf_rd_value_o <= x_shifter_rd_value_i;
else if ( x_rd_source_i == `RD_SOURCE_MULTIPLY )
rf_rd_value_o <= x_multiply_rd_value_i;
else
rf_rd_value_o <= x_rd_value_i;
assign rf_rd_value_o = (x_load_i || pending_load ? load_value : x_rd_value_i );
always@*
if (w_stall_i)
rf_rd_write_o <= 0;
else if ( (x_load_i || pending_load) && dm_load_done_i)
rf_rd_write_o <= 1;
else
rf_rd_write_o <= x_rd_write_i;
// assign rf_rd_value_o = (x_load_i || pending_load ? load_value : x_rd_value_i );
assign rf_rd_o = (x_rd_i);
assign rf_rd_write_o = !interlock_d && (w_stall_i ? 1'b0 : ((x_load_i || pending_load) && dm_load_done_i ? 1'b1 : x_rd_write_i ));
assign w_stall_req_o = ((x_load_i || pending_load) && !dm_load_done_i) || ((x_store_i || pending_store) && !dm_store_done_i) || (interlock && !interlock_d);
assign w_stall_req_o = ((x_load_i || pending_load) && !dm_load_done_i) || ((x_store_i || pending_store) && !dm_store_done_i);
assign TRIG2[6] = x_load_i;
......@@ -155,9 +158,6 @@ module rv_writeback
assign TRIG2[9] = x_store_i;
assign TRIG2[10] = pending_store;
assign TRIG2[11] = dm_store_done_i;
assign TRIG2[12] = interlock;
assign TRIG2[13] = interlock_d;
assign TRIG2[14] = pending_load_hazard;
assign TRIG2[15] = w_stall_req_o;
endmodule // rv_writeback
......@@ -31,8 +31,7 @@
#define BASE_UART 0x20000
#define BASE_GPIO 0x21000
//#define UART_BAUDRATE 115200
#define UART_BAUDRATE 10000000
#define UART_BAUDRATE 115200
static inline void writel ( uint32_t reg, uint32_t val)
{
......
......@@ -46,13 +46,25 @@ void uart_init_hw()
}
volatile int *TX_REG = 0x100000;
void putc(char c)
{
*TX_REG = c;
}
void uart_write_byte(int b)
{
#ifdef SIM
putc(b);
#else
if (b == '\n')
uart_write_byte('\r');
while (uart->SR & UART_SR_TX_BUSY)
;
uart->TDR = b;
#endif
}
int uart_poll()
......
......@@ -7,13 +7,13 @@ OBJDUMP = $(CROSS_COMPILE)objdump
OBJCOPY = $(CROSS_COMPILE)objcopy
SIZE = $(CROSS_COMPILE)size
CFLAGS = -g -m32 -msoft-float -march=RV32I -I. -I../common
CFLAGS = -g -O2 -m32 -msoft-float -march=RV32I -I. -I../common -DSIM
OBJS = ../common/crtuser.o main.o ../common/uart.o ../common/printf.o ../common/vsprintf-xint.o
LDS = ../common/ram2.ld
OUTPUT=hello
$(OUTPUT): $(LDS) $(OBJS)
${CC} -g -m32 -msoft-float -march=RV32I -o $(OUTPUT).elf -nostartfiles $(OBJS) -lm -L../coremark -lcoremark -T $(LDS)
${CC} -g -m32 -msoft-float -march=RV32I -o $(OUTPUT).elf -nostartfiles $(OBJS) -lm -L../coremark_v1.0 -lcoremark -T $(LDS)
${OBJCOPY} -O binary $(OUTPUT).elf $(OUTPUT).bin
${OBJDUMP} -D $(OUTPUT).elf > disasm.S
../genraminit $(OUTPUT).bin 16384 > $(OUTPUT).ram
......
......@@ -45,16 +45,27 @@ uint32_t sys_get_ticks()
return read_csr(0xc01);
}
extern void coremark_main();
extern void coremark_main(int argc, char *argv[]);
main()
{
uart_init_hw();
int argc = 1;
char *argv[] = {"coremark"};
int i;
for(i=0;i<100;i++)
{
float f = 2*3.14*(float)i / 100.0;
int y = (int) (1000.0 *sin(f));
pp_printf("%d %d\n", i, y);
}
pp_printf("Hello, world [%d]!\n\r", sys_get_ticks());
// coremark_main(argc, argv);
coremark_main();
// for(;;);
for(;;);
enable_irqs();
......
......@@ -6,25 +6,37 @@ _start:
la a5, lab1
lw a5, 0(a5)
lw a5, 0(a5)
lw a5, 0(a5)
nop
nop
nop
jr a5
forever:
j forever
lab1:
.word lab2
lab2:
.word lab3
lab3:
.word 0xdeadbeef
.word continue
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop