From 594a9dd1b5b3f4c96976f67c2393f3310077cfa1 Mon Sep 17 00:00:00 2001
From: Tristan Gingold <>
Date: Mon, 29 Nov 2021 14:43:09 +0100
Subject: [PATCH] First hydra test

 hdl/rtl/hydra_core.vhd        | 234 +++---------------------------
 hdl/top/sf2-test/sf2_test.vhd | 258 ++++++++--------------------------
 sw/sf2-test/Makefile          |  37 +++++
 sw/sf2-test/crt0.S            |  16 +++
 sw/sf2-test/main.c            |  59 ++++++++
 sw/sf2-test/ram.ld            |  38 +++++
 sw/sf2-test/          |  18 +++
 7 files changed, 242 insertions(+), 418 deletions(-)
 create mode 100644 sw/sf2-test/Makefile
 create mode 100644 sw/sf2-test/crt0.S
 create mode 100644 sw/sf2-test/main.c
 create mode 100644 sw/sf2-test/ram.ld
 create mode 100755 sw/sf2-test/

diff --git a/hdl/rtl/hydra_core.vhd b/hdl/rtl/hydra_core.vhd
index 9884633..3ae941b 100644
--- a/hdl/rtl/hydra_core.vhd
+++ b/hdl/rtl/hydra_core.vhd
@@ -32,38 +32,23 @@ use work.genram_pkg.all;
 use work.wishbone_pkg.all;
 use work.urv_pkg.all;
-entity fip_urv is
+entity hydra_core is
-    g_IRAM_LOG_SIZE : natural := 12;
+    g_IRAM_LOG_SIZE : natural := 12;  --  In bytes
     g_DRAM_LOG_SIZE : natural := 12);
     clk_sys_i : in  std_logic;
     rst_n_i   : in  std_logic;
-    restart_i : in  std_logic;
     dwb_o     : out t_wishbone_master_out;
     dwb_i     : in  t_wishbone_master_in;
-    fic0_ready_i   : in  std_logic;
-    fic0_clk_i     : in  std_logic;
-    fic0_haddr_o     : out std_logic_vector(31 downto 0);
-    fic0_hburst_o    : out std_logic_vector(2 downto 0);
-    fic0_hmastlock_o : out std_logic;
-    fic0_hprot_o     : out std_logic_vector(3 downto 0);
-    fic0_hsize_o     : out std_logic_vector(2 downto 0);
-    fic0_htrans_o    : out std_logic_vector(1 downto 0);
-    fic0_hwdata_o    : out std_logic_vector(31 downto 0);
-    fic0_hwrite_o    : out std_logic;
-    fic0_hrdata_i    : in  std_logic_vector(31 downto 0);
-    fic0_hready_i    : in  std_logic;
-    fic0_hresp_i     : in  std_logic_vector(1 downto 0);
-    led_en_o     : out std_logic;
-    leds_o       : out std_logic_vector(4 downto 1)
+    iram_addr : in  std_logic_vector(g_IRAM_LOG_SIZE + 1 downto 2);
+    iram_we   : in  std_logic;
+    iram_data : in  std_logic_vector(31 downto 0)
-end fip_urv;
+end hydra_core;
-architecture arch of fip_urv is
+architecture arch of hydra_core is
   impure function f_x_to_zero (x : std_logic_vector) return std_logic_vector
@@ -95,9 +80,9 @@ architecture arch of fip_urv is
   signal cpu_rst        : std_logic;
   signal cpu_rst_d      : std_logic;
-  signal im_addr,  im_waddr : std_logic_vector(31 downto 0);
-  signal im_data,  im_wdata : std_logic_vector(31 downto 0);
-  signal im_valid, im_write : std_logic;
+  signal im_addr  : std_logic_vector(31 downto 0);
+  signal im_data  : std_logic_vector(31 downto 0);
+  signal im_valid : std_logic;
   signal dm_addr, dm_data_s, dm_data_l                            : std_logic_vector(31 downto 0);
   signal dm_data_select                                           : std_logic_vector(3 downto 0);
@@ -111,10 +96,10 @@ architecture arch of fip_urv is
   signal dm_mem_rdata, dm_wb_rdata : std_logic_vector(31 downto 0);
   signal dm_wb_write, dm_select_wb : std_logic;
-  signal reg_dm_data_write             : std_logic;
   signal dwb_out         : t_wishbone_master_out;
+  cpu_rst <= not rst_n_i;
   dwb_o <= dwb_out;
@@ -173,8 +158,8 @@ begin
     if rising_edge(clk_sys_i) then
       if cpu_rst = '1' then
-        if im_write = '1' then
-          iram (to_integer(unsigned(im_waddr(g_IRAM_LOG_SIZE - 1 downto 2)))) := im_wdata;
+        if iram_we = '1' then
+          iram (to_integer(unsigned(iram_addr))) := iram_data;
         end if;
         im_data <= iram (to_integer(unsigned(im_addr(g_IRAM_LOG_SIZE - 1 downto 2))));
@@ -184,8 +169,6 @@ begin
   -- 1st MByte of the mem is the RAM
   reg_dm_is_wishbone <= '1' when reg_dm_addr(31 downto 20) /= x"000" else '0';
-  reg_dm_data_write <= not reg_dm_is_wishbone and reg_dm_store;
   dm_data_l     <= dm_wb_rdata when dm_select_wb = '1' else dm_mem_rdata;
   p_ram: process (clk_sys_i)
@@ -193,10 +176,12 @@ begin
     variable dram : t_ram32_type (2**(g_DRAM_LOG_SIZE - 2) - 1 downto 0);
     variable addr : natural range dram'range;
-    if rising_edge(clk_sys_i) then
+    if rising_edge(clk_sys_i) and reg_dm_is_wishbone = '0'
+      and (reg_dm_store or reg_dm_load) = '1'
+    then
       addr := to_integer(unsigned(reg_dm_addr(g_DRAM_LOG_SIZE - 1 downto 2)));
       dm_mem_rdata <= dram(addr);
-      if reg_dm_data_write = '1' then
+      if reg_dm_store = '1' then
         for i in 0 to 3 loop
           if reg_dm_data_select (i) = '1' then
             dram(addr)(8*i + 7 downto 8*i) := reg_dm_data_s(8*i + 7 downto 8*i);
@@ -294,189 +279,4 @@ begin
       end if;
     end if;
   end process p_im_valid;
-  b_init: block
-    type t_main_state is (init, envm_ready, envm_ready2, read_word, wait_word, done);
-    signal main_state : t_main_state;
-    type t_ahb_state is (idle, addr_phase, wait_rsp, wait_idle);
-    signal ahb_state : t_ahb_state;
-    signal htrans : std_logic_vector(1 downto 0);
-    signal haddr : std_logic_vector(31 downto 0);
-    signal hpms_start, hpms_done : std_logic;
-    signal fic_start, fic_done : std_logic;
-    signal hpms_data : std_logic_vector (31 downto 0);
-    signal envm_addr : std_logic_vector (31 downto 0);
-    signal envm_len : unsigned (31 downto 0);
-    signal dest_addr : std_logic_vector (31 downto 0);
-    signal leds : std_logic_vector(1 downto 0);
-  begin
-    fic0_hburst_o <= "000";
-    fic0_hmastlock_o <= '0';
-    fic0_hprot_o <= "0011";
-    fic0_hsize_o <= "010";
-    fic0_htrans_o <= htrans;
-    fic0_haddr_o <= haddr;
-    leds_o(2 downto 1) <= "11";
-    leds_o(4 downto 3) <= leds;
-    -- Main FSM: copy eNVM to IRAM
-    process (clk_sys_i)
-    begin
-      if rising_edge (clk_sys_i) then
-        hpms_start <= '0';
-        im_write <= '0';
-        cpu_rst <= '1';
-        led_en_o <= '1';
-        fic0_hwrite_o <= '0';
-        if rst_n_i = '0' then
-          main_state <= init;
-          leds <= "11";
-        else
-          case main_state is
-            when init =>
-              leds <= "11";
-              if fic0_ready_i = '1' then
-                -- poll eNVM ready bit from status register
-                haddr <= x"6008_0120";
-                fic0_hwrite_o <= '0';
-                hpms_start <= '1';
-                main_state <= envm_ready;
-              end if;
---             main_state <= done;
-            when envm_ready =>
-              leds <= "10";
-              hpms_start <= '1';
-              if hpms_done = '1' then
-                hpms_start <= '0';
-                main_state <= envm_ready2;
-              end if;
-            when envm_ready2 =>
-              if hpms_done = '0' then
-                if hpms_data (0) = '1' then
-                  --  eNVM is ready.
-                  main_state <= read_word;
-                  --  Copy the whole IRAM
-                  envm_addr <= x"6000_0000";
-                  envm_len <= to_unsigned(2**g_IRAM_LOG_SIZE, 32);
-                  dest_addr <= x"0000_0000";
-                else
-                  --  eNVM not ready
-                  --  poll again.
-                  hpms_start <= '1';
-                  main_state <= envm_ready;
-                end if;
-              end if;
-            when read_word =>
-              leds <= "00";
-              if hpms_done = '0' then
-                haddr <= envm_addr;
-                hpms_start <= '1';
-                envm_addr <= std_logic_vector(unsigned(envm_addr) + 4);
-                envm_len <= envm_len - 4;
-                main_state <= wait_word;
-              end if;
-            when wait_word =>
-              leds <= "00";
-              hpms_start <= '1';
-              if hpms_done = '1' then
-                --  Write the word.
-                im_wdata <= hpms_data;
-                im_write <= '1';
-                im_waddr <= dest_addr;
-                dest_addr <= std_logic_vector(unsigned(dest_addr) + 4);
-                if envm_len = 0 then
-                  main_state <= done;
-                else
-                  main_state <= read_word;
-                end if;
-              end if;
-            when done =>
-              led_en_o <= '0';
-              leds <= "00";
-              cpu_rst <= '0';
-              if restart_i = '1' then
-                cpu_rst <= '1';
-                main_state <= init;
-              end if;
-          end case;
-        end if;
-      end if;
-    end process;
-    inst_sync_start: entity work.gc_sync
-      port map (
-        clk_i => fic0_clk_i,
-        rst_n_a_i => rst_n_i,
-        d_i => hpms_start,
-        q_o => fic_start
-      );
-    inst_sync_done: entity work.gc_sync
-      port map (
-        clk_i => clk_sys_i,
-        rst_n_a_i => rst_n_i,
-        d_i => fic_done,
-        q_o => hpms_done
-      );
-    --  AHB FSM (in FIC clock domain)
-    process (fic0_clk_i)
-    begin
-      if rising_edge (fic0_clk_i) then
-        htrans <= "00";
-        fic_done <= '0';
-        if rst_n_i = '0' then
-          ahb_state <= idle;
-        else
-          case ahb_state is
-            when idle =>
-              if fic_start = '1' then
-                htrans <= "10";  --  Non seq
-                ahb_state <= addr_phase;
-              end if;
-            when addr_phase =>
-              if fic0_hready_i = '1' then
-                ahb_state <= wait_rsp;
-              else
-                --  Was not ready.
-                htrans <= "10";  --  Non seq
-              end if;
-            when wait_rsp =>
-              if fic0_hready_i = '1' then
-                ahb_state <= wait_idle;
-                fic_done <= '1';
-                hpms_data <= fic0_hrdata_i;
-              end if;
-            when wait_idle =>
-              if fic_start = '0' then
-                fic_done <= '0';
-                ahb_state <= idle;
-              else
-                fic_done <= '1';
-              end if;
-          end case;
-        end if;
-      end if;
-    end process;
-  end block;
 end arch;
diff --git a/hdl/top/sf2-test/sf2_test.vhd b/hdl/top/sf2-test/sf2_test.vhd
index e0a14be..911cd93 100644
--- a/hdl/top/sf2-test/sf2_test.vhd
+++ b/hdl/top/sf2-test/sf2_test.vhd
@@ -2,6 +2,8 @@ library ieee;
 use ieee.std_logic_1164.all;
 use ieee.numeric_std.all;
+use work.wishbone_pkg.all;
 --library smartfusion2;
 --use smartfusion2.all;
@@ -45,50 +47,49 @@ architecture behav of sf2_test is
   signal rst_n : std_logic;
   type state_t is (INIT_DELAY,
-                   INIT_UART_RESET_R,
-                   INIT_UART_RESET_W,
-                   INIT_UART_LCR_DLAB,
-                   INIT_UART_DLR,
-                   INIT_UART_DMR,
-                   INIT_UART_DLR_2,
-                   INIT_UART_LCR,
-                   READ_STATUS,
-                   SEND_RECV,
-                   UART_RECV,
-                   UART_SEND);
+                   DONE);
   signal state : state_t;
   signal counter : unsigned(27 downto 0);
   signal ahb_start, ahb_done, ahb_rw : std_logic;
   signal ahb_addr, ahb_wdata, ahb_rdata : std_logic_vector(31 downto 0);
-  --  To send a byte: wait until uart_tx_rdy = '1', set byte and pulse cmd.
-  signal uart_tx_byte : std_logic_vector(7 downto 0);
-  signal uart_tx_rdy : std_logic;
-  signal uart_tx_cmd : std_logic;
-  signal uart_tx_creg : std_logic;
-  --  To recv a byte: wait until uart_rx_rdy = '1', read byte, pulse cmd.
-  signal uart_rx_byte : std_logic_vector(7 downto 0);
-  signal uart_rx_rdy : std_logic;
-  signal uart_rx_cmd : std_logic;
+  signal iahb_start : std_logic;
+  signal iahb_addr : std_logic_vector(31 downto 0);
   type ahb_state_t is (STATE_AHB_IDLE, STATE_AHB_ADDR, STATE_AHB_DATA);
   signal ahb_state : ahb_state_t;
-  signal letter : std_logic_vector(7 downto 0);
+  --  IRAM log size in bytes.
+  constant IRAM_LOG_SIZE : natural := 8;
-  constant mem_abits : natural := 11; -- 2K * 32
+  signal iram_addr : std_logic_vector(IRAM_LOG_SIZE - 1 downto 0);
+  signal iram_we : std_logic;
+  signal iram_data : std_logic_vector(31 downto 0);
-  type mem_t is array (natural range <>) of std_logic_vector(31 downto 0);
-  signal mem : mem_t (0 to 2**mem_abits - 1);
+  signal cpu_rst_n : std_logic;
+  signal dwb_out : t_wishbone_master_out;
+  signal dwb_in  : t_wishbone_master_in;
-  signal mem_raddr : unsigned(mem_abits - 1 downto 0);
-  signal mem_we : std_logic;
-  signal mem_play : std_logic;
+  inst_hydra: entity work.hydra_core
+    generic map (
+      g_iram_log_size => IRAM_LOG_SIZE,
+      g_dram_log_size => 12
+    )
+    port map (
+      clk_sys_i => clk_100,
+      rst_n_i => cpu_rst_n,
+      dwb_o => dwb_out,
+      dwb_i => dwb_in,
+      iram_addr => iram_addr,
+      iram_data => iram_data,
+      iram_we => iram_we
+    );
   inst_uart: entity work.uart
     port map (
       FIC_0_AHB_S_HADDR     => FIC_0_AHB_S_HADDR,
@@ -132,205 +133,60 @@ begin
     end if;
   end process;
-  proc_main: process (clk_100)
-  begin
-    if rising_edge(clk_100) then
-      uart_rx_cmd <= '0';
-      uart_tx_cmd <= '0';
-      if rst_n = '0' then
-        letter <= x"41";
-        mem_raddr <= (others => '0');
-        mem_play <= '1';
-      else
-        if mem_play = '1' then
-          letter <= mem (to_integer (mem_raddr))(7 downto 0);
-        end if;
-        if counter (25 downto 0) = (25 downto 0 => '1')
-          and uart_tx_rdy = '1'
-        then
-          uart_tx_cmd <= '1';
-          uart_tx_byte <= letter;
-          if mem_play = '1' then
-            if letter = (7 downto 0 => '0') then
-              mem_play <= '0';
-              letter <= x"41";
-            else
-              mem_raddr <= mem_raddr + 1;
-            end if;
-          else
-            if letter = x"5A" then
-              letter <= x"41";
-            elsif letter = x"7A" then
-              letter <= x"61";
-            elsif letter = x"7E" or letter (7) = '1' then
-              letter <= x"20";
-            else
-              letter <= std_logic_vector(unsigned(letter) + 1);
-            end if;
-          end if;
-        else
-          if uart_rx_rdy = '1' then
-            letter <= uart_rx_byte;
-            uart_rx_cmd <= '1';
-            if letter = x"0c" then
-              mem_raddr <= (others => '0');
-              mem_play <= not mem_play;
-            end if;
-          end if;
-          if but_i = '0' then
-            letter <= x"41";
-          end if;
-        end if;
-      end if;
-    end if;
-  end process;
+  iram_addr <= iahb_addr (IRAM_LOG_SIZE + 2 - 1 downto 2);
+  iram_data <= ahb_rdata;
   proc_uart: process (clk_100)
     if rising_edge(clk_100) then
-      ahb_start <= '0';
-      mem_we <= '0';
+      iahb_start <= '0';
+      cpu_rst_n <= '0';
+      iram_we <= '0';
       if rst_n = '0' then
         state <= INIT_DELAY;
         counter <= (others => '0');
-        uart_rx_rdy <= '0';
-        uart_tx_rdy <= '0';
-        uart_tx_creg <= '0';
-        uart_tx_creg <= uart_tx_creg or uart_tx_cmd;
-        uart_rx_rdy <= uart_rx_rdy and not uart_rx_cmd;
         case state is
           when INIT_DELAY =>
-            if counter = x"c00_0000" then
-              state <= INIT_UART_RESET_R;
-            end if;
-            counter <= counter + 1;
-          when INIT_UART_RESET_R =>
-            --  1. Clear reset
-            ahb_addr <= x"4003_8048";
-            ahb_rw <= '1';
-            ahb_start <= '1';
-            counter <= (others => '0');
-            state <= INIT_UART_RESET_W;
-          when INIT_UART_RESET_W =>
-            if ahb_done = '1' then
-              ahb_addr <= x"4003_8048";
-              ahb_wdata <= ahb_rdata and x"ffff_ff7f";
-              ahb_rw <= '0';
-              ahb_start <= '1';
-              state <= INIT_UART_LCR_DLAB;
-            end if;
-          when INIT_UART_LCR_DLAB =>
-            if ahb_done = '1' then
-              ahb_addr <= x"4000_000C"; --  LCR
-              ahb_wdata <= x"000000_80";
-              ahb_rw <= '0';
-              ahb_start <= '1';
-              state <= INIT_UART_DLR;
-            end if;
-          when INIT_UART_DLR =>
-            if ahb_done = '1' then
-              ahb_addr <= x"4000_0000";
-              ahb_wdata <= x"000000_36";
-              ahb_rw <= '0';
-              ahb_start <= '1';
-              state <= INIT_UART_DMR;
-            end if;
-          when INIT_UART_DMR =>
-            if ahb_done = '1' then
-              ahb_addr <= x"4000_0004";
-              ahb_wdata <= x"000000_00";
-              ahb_rw <= '0';
-              ahb_start <= '1';
-              state <= INIT_UART_DLR_2;
-            end if;
-          when INIT_UART_DLR_2 =>
-            if ahb_done = '1' then
-              ahb_addr <= x"4000_000C"; --  LCR
-              ahb_wdata <= x"000000_03";
-              ahb_rw <= '0';
-              ahb_start <= '1';
-              state <= INIT_UART_LCR;
-            end if;
-          when INIT_UART_LCR =>
-            if ahb_done = '1' then
+            iahb_addr <= (others => '0');
+            if counter = x"000_0010" then
               state <= READ_NVM;
+              counter <= (others => '0');
+            else
+              counter <= counter + 1;
             end if;
           when READ_NVM =>
-            ahb_addr <= std_logic_vector (x"6000" & counter (15 downto 0));
-            ahb_rw <= '1';
-            ahb_start <= '1';
+            iahb_addr <= std_logic_vector (x"6000" & counter (13 downto 0) & "00");
+            iahb_start <= '1';
             state <= WRITE_MEM;
           when WRITE_MEM =>
             if ahb_done = '1' then
-              mem_we <= '1';
-              if counter = x"000_07fc" then
-                state <= READ_STATUS;
+              iram_we <= '1';
+              if counter (IRAM_LOG_SIZE - 3 downto 0)= (IRAM_LOG_SIZE - 3 downto 0 => '1') then
+                state <= DONE;
-                counter <= counter + 4;
+                counter <= counter + 1;
                 state <= READ_NVM;
               end if;
             end if;
-          when READ_STATUS =>
-            counter <= counter + 1;
-            ahb_addr <= x"4000_0014"; --  LSR
-            ahb_rw <= '1';
-            ahb_start <= '1';
-            state <= SEND_RECV;
-          when SEND_RECV =>
-            counter <= counter + 1;
-            if ahb_done = '1' then
-              ahb_addr <= x"4000_0000"; --  rx/tx
-              uart_tx_rdy <= ahb_rdata(5); -- TEMT transmit empty
-              if ahb_rdata(0) = '1' then
-                --  Data ready -> read byte.
-                ahb_rw <= '1';
-                ahb_start <= '1';
-                state <= UART_RECV;
-              elsif uart_tx_creg = '1' and ahb_rdata(5) = '1' then
-                --  Byte to transmit.
-                ahb_rw <= '0';
-                ahb_start <= '1';
-                ahb_wdata (31 downto 8) <= (others => '0');
-                ahb_wdata (7 downto 0) <= uart_tx_byte;
-                uart_tx_creg <= '0';
-                state <= UART_SEND;
-              else
-                state <= READ_STATUS;
-              end if;
-            end if;
-          when UART_RECV =>
-            counter <= counter + 1;
-            if ahb_done = '1' then
-              uart_rx_byte <= ahb_rdata (7 downto 0);
-              uart_rx_rdy <= '1';
-              state <= READ_STATUS;
-            end if;
-          when UART_SEND =>
-            counter <= counter + 1;
-            if ahb_done = '1' then
-              state <= READ_STATUS;
-            end if;
+          when DONE =>
+            cpu_rst_n <= '1';
         end case;
       end if;
     end if;
   end process;
-  proc_memw: process (clk_100)
-  begin
-    if rising_edge(clk_100) then
-      if mem_we = '1' then
-        mem(to_integer(counter (mem_abits - 1 + 2 downto 2))) <= ahb_rdata;
-      end if;
-    end if;
-  end process;
+  ahb_start <= iahb_start when cpu_rst_n = '0' else (dwb_out.stb and dwb_out.cyc);
+  ahb_addr  <= iahb_addr  when cpu_rst_n = '0' else dwb_out.adr;
+  ahb_rw    <= '1'        when cpu_rst_n = '0' else not dwb_out.we;
+  ahb_wdata <= dwb_out.dat;
+  dwb_in.err <= '0';
+  dwb_in.rty <= '0';
+  dwb_in.stall <= '0'; --  FIXME
+  dwb_in.dat <= ahb_rdata;
+  dwb_in.ack <= cpu_rst_n and ahb_done;
   proc_ahb: process (clk_100)
     if rising_edge(clk_100) then
diff --git a/sw/sf2-test/Makefile b/sw/sf2-test/Makefile
new file mode 100644
index 0000000..a26bd16
--- /dev/null
+++ b/sw/sf2-test/Makefile
@@ -0,0 +1,37 @@
+# and don't touch the rest unless you know what you're doing.
+CROSS_COMPILE ?= riscv32-elf-
+CFLAGS = -mabi=ilp32 -march=rv32im -Os -Wall
+OBJS = crt0.o $(OUTPUT).o
+LDS = ram.ld
+all: $(OUTPUT).mem
+main.o: main.c
+%.bin: %.elf
+	${OBJCOPY} -O binary $< $@
+%.mem: %.bin
+	./ $< > $@
+$(OUTPUT).elf: $(LDS) $(OBJS)
+	${XCC} $(CFLAGS) -o $@ -nostartfiles $(OBJS) -T $(LDS) -Wl,-Map=$(OUTPUT).map
+	$(SIZE) $@
+	rm -f $(OUTPUT).elf $(OUTPUT).bin $(OUTPUT).ram $(OBJS)
+%.o: %.S
+	${XCC} -c $(CFLAGS) $< -o $@
+%.o: %.c
+	${XCC} -c $(CFLAGS) $< -o $@
diff --git a/sw/sf2-test/crt0.S b/sw/sf2-test/crt0.S
new file mode 100644
index 0000000..be75e12
--- /dev/null
+++ b/sw/sf2-test/crt0.S
@@ -0,0 +1,16 @@
+  .section    .boot, "ax", @progbits
+ .global _start
+        la      gp, _gp                 # Initialize global pointer
+	la      sp, _fstack
+# clear the bss segment
+	la	t0, _fbss
+	la	t1, _end
+	sw	zero,0(t0)
+	addi	t0, t0, 4
+	bltu	t0, t1, 1b
+        call    main
diff --git a/sw/sf2-test/main.c b/sw/sf2-test/main.c
new file mode 100644
index 0000000..a3a70fc
--- /dev/null
+++ b/sw/sf2-test/main.c
@@ -0,0 +1,59 @@
+#define SYS_RESET 0x40038048
+#define UART_BASE 0x40000000
+#define UART_RX  (UART_BASE | 0x00)
+#define UART_TX  (UART_BASE | 0x00)
+#define UART_DLR (UART_BASE | 0x00)
+#define UART_DMR (UART_BASE | 0x04)
+#define UART_LCR (UART_BASE | 0x0c)
+#define UART_LSR (UART_BASE | 0x14)
+static void
+uart_raw_putc (unsigned char c)
+  /* Wait until TEMT (transmit empty) is set.  */
+  /* NB: bit 5 for RX not empty */
+  while (!(*(volatile unsigned*)UART_LSR & 0x20))
+    ;
+  *(volatile unsigned *)UART_TX = c;
+static void
+uart_putc (char c)
+  if (c == '\n')
+    uart_raw_putc ('\r');
+  uart_raw_putc (c);
+#if 0
+static void
+uart_puts (const char *s)
+  while (*s)
+    uart_putc (*s++);
+main (void)
+  /* Unreset UART 0 */
+  *(volatile unsigned *)SYS_RESET &= 0xffffff7f;
+  /* Select baud.  */
+  *(volatile unsigned *)UART_LCR = 0x80;
+  /* Set baudrate */
+  *(volatile unsigned *)UART_DLR = 0x36;
+  *(volatile unsigned *)UART_DMR = 0x00;
+  /* start operation.  */
+  *(volatile unsigned *)UART_LCR = 0x03;
+  while (1) {
+    uart_putc ('H');
+    uart_putc ('i');
+    uart_putc ('?');
+    uart_putc ('\n');
+  }
diff --git a/sw/sf2-test/ram.ld b/sw/sf2-test/ram.ld
new file mode 100644
index 0000000..06b6b92
--- /dev/null
+++ b/sw/sf2-test/ram.ld
@@ -0,0 +1,38 @@
+    rom :
+ ORIGIN = 0x00000000,
+ LENGTH = 2048
+    bss :
+ ORIGIN = 0x00010000,
+ LENGTH = 2048
+ /* Sorry, but there is no initialized ram.  */
+   empty :
+ ORIGIN = 0x00020000,
+ LENGTH = 0
+ .boot : { *(.boot) } > rom
+ .text : { *(.text .text.*) } > rom =0
+ _gp = .;
+ .rodata : { *(.rodata .rodata.*) } > empty
+ .data : { *(.data .data.*) } > empty
+ .bss : {
+  _fbss = .;
+  *(.bss .bss.*)
+  *(COMMON)
+  _ebss = .;
+  . = ALIGN(.);
+  _fstack = . + 1024;
+  _end = .;
+ } > bss
diff --git a/sw/sf2-test/ b/sw/sf2-test/
new file mode 100755
index 0000000..17337f2
--- /dev/null
+++ b/sw/sf2-test/
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+import struct
+import sys
+def main():
+    if len(sys.argv) < 2:
+        sys.exit("Usage: {} FILE".format(sys.argv[0]))
+    filename = sys.argv[1]
+    b = open(filename, 'rb').read()
+    if len(b) % 4 != 0:
+        sys.exit("length of {} is not a multiple of 4".format(filename))
+    for i in range(len(b)):
+        v, = struct.unpack('<B', b[i])
+        print('{:08b}'.format(v))
+if __name__ == '__main__':
+    main()