diff --git a/hdl/sim/example_tb/main.sv b/hdl/sim/example_tb/main.sv
index 7a173ce2a5864e37423bb7ff6e0a25976a5c04c4..1dbdf35a0af57e1bd43cff70b64e88a88c87f650 100644
--- a/hdl/sim/example_tb/main.sv
+++ b/hdl/sim/example_tb/main.sv
@@ -40,6 +40,11 @@ module main;
 
    logic gn4124_irq;
 
+
+   string testphase     = "null:= no phase set yet";
+   string test_subphase = "ha!";
+
+
    t_wishbone_master_in  wb_in, wb_dma_in, wb_mem_in;
    t_wishbone_master_out wb_out, wb_dma_out, wb_mem_out;
 
@@ -164,325 +169,201 @@ module main;
         $fatal(1, "dma irq should be 0");
    endtask
 
-   initial begin
-
-      int stall_cycle[14];
-
-      automatic int ntest = 1;
-      const     int tests = 12;
-
-      uint32_t addr, val, expected;
-
-      @(posedge i_gn4124.ready);
-
-      acc = i_gn4124.get_accessor();
-
-      acc.set_default_xfer_size(4);
-
-      @(posedge clk_125m);
-
-      // ---------------------------------
-      $write("Test %0d/%0d: simple read/write accesses over Wishbone: ",
-             ntest++, tests);
-
-      // Verify simple read/writes over wishbone
-      reg_check('h0, 'h0);
-
-      acc.write('h0c, 'hffacce55);
-      acc.write('h10, 'h1badcafe);
-
-      reg_check('h0c, 'hffacce55);
-      reg_check('h10, 'h1badcafe);
-
-      // Reset all DMA config registers
-      for (addr = 'h00; addr <= 'h20; addr += 4)
-        begin
-           acc.write(addr, 'h0);
-        end
-
-      repeat(2) @(posedge clk_125m);
-
-      $write("PASS\n");
-
-      // ---------------------------------
-      $write("Test %0d/%0d: 128B read over DMA, abort after first read: ",
-             ntest++, tests);
-
-      if (dma_irq != 1'b0)
-        $fatal(1, "dma irq should be 0");
-
-      acc.write('h14, 'h80); // count
-      acc.write('h00, 'h01); // start
-
-      // wait for transfer to start
-      @(posedge i_gn4124.l2p_valid);
-
-      repeat(2) @(posedge clk_125m);
-
-      // Test abort feature
-      acc.write('h00, 'h02);
-      reg_check('h04, 'h03);
-      acc.write('h00, 'h00);
-
-      repeat(2) @(posedge clk_125m);
-
-      $write("PASS\n");
-
-      // ---------------------------------
-      $write("Test %0d/%0d: 256B DMA write: ",
-             ntest++, tests);
-
-      // Setup data in BFM memory
-      for (addr = 'h00; addr < 'h40; addr += 1)
-        i_gn4124.host_mem_write(4 * addr, 32'h80000020 - addr);
-
-      // Setup DMA
-      acc.write('h14, 'h100); // count
-      acc.write('h20, 'h01); // attrib
-      acc.write('h0c, 'h20000000); // hstartL
-      acc.write('h10, 'h00000000); // hstartH
-
-      acc.write('h00, 'h01); // start
-
-      @(posedge dma_irq);
-
-      check_irq_status;
-      clear_irq;
-
-      repeat(4) @(posedge clk_125m);
-
-      $write("PASS\n");
-
-      // wait for WB transfer to finish
-      #5us;
 
-      // ---------------------------------
-      $write("Test %0d/%0d: 2x128B chained DMA reads: ",
-             ntest++, tests);
 
-      // Setup DMA chain info in BFM memory
-      i_gn4124.host_mem_write('h20000, 'h00000080); // remote address
-      i_gn4124.host_mem_write('h20004, 'h20000080); // hstartL
-      i_gn4124.host_mem_write('h20008, 'h00000000); // hstartH
-      i_gn4124.host_mem_write('h2000C, 'h80); // count
-      i_gn4124.host_mem_write('h20010, 'h00); // nextL
-      i_gn4124.host_mem_write('h20014, 'h00); // nextH
-      i_gn4124.host_mem_write('h20018, 'h00); // attrib
 
-      acc.write('h14, 'h80); // count
-      acc.write('h20, 'h02); // attrib
-      acc.write('h0c, 'h20000000); // hstartL
-      acc.write('h10, 'h00000000); // hstartH
-
-      // Point to chain info in BFM memory
-      acc.write('h18, 'h20020000); // nextL
-      acc.write('h1C, 'h00000000); // nextH
-
-      acc.write('h00, 'h01); // start
-
-      @(posedge dma_irq);
-
-      check_irq_status;
-      clear_irq;
-
-      for (addr = 'h00; addr < 'h40; addr += 1)
-        begin
-           expected = 32'h80000020 - addr;
-           mem_check(4 * addr, expected);
-        end
-
-      repeat(4) @(posedge clk_125m);
-
-      $write("PASS\n");
-
-      // ---------------------------------
-      $write("Test %0d/%0d: 256B DMA read: ",
-             ntest++, tests);
+   task t_setup_dma( input [63:0] host_addr,
+                     input [31:0] local_addr,
+                     input [31:0] length,
+                    // input [31:0] attributes,
+                     input [63:0] next_chained_descr_addr,
+                     input bit more_in_chain,
+                     input bit w_nrd_sel);
+      automatic logic [31:0] attributes;
+      attributes = 'h0;
+      attributes[1] = more_in_chain;    // doc has these the wrong way round!!!
+      attributes[0] = w_nrd_sel;        // doc has these the wrong way round!!!
+      acc.write('h08, local_addr[ 0+:32]);                // carrier local addr
+      acc.write('h0c, host_addr[ 0+:32]);                 // hstartL
+      acc.write('h10, host_addr[32+:32]);                 // hstartH
+      acc.write('h14, length);                            // count
+      acc.write('h18, next_chained_descr_addr[ 0+:32]);   // nextL
+      acc.write('h1C, next_chained_descr_addr[32+:32]);   // nextH
+      acc.write('h20, attributes);                              // attrib
+   endtask
 
-      // Setup DMA
-      acc.write('h14, 'h100); // count
-      acc.write('h20, 'h00); // attrib
-      acc.write('h0c, 'h20000000); // hstartL
-      acc.write('h10, 'h00000000); // hstartH
 
+   task t_start_dma();
       acc.write('h00, 'h01); // start
+//      bits 3:2 are byte_ordering
+//      bit 1 is "abort"
+// bit 0 is tart
+   endtask
 
-      @(posedge dma_irq);
 
-      check_irq_status;
-      clear_irq;
+   task t_add_chaining_descriptor(
+                     input [63:0] host_addr,
+                     input [31:0] local_addr,
+                     input [31:0] length,
+                     //input [31:0] attributes,
+                     input [63:0] next_chained_descr_addr,
+                     input [63:0] start_address_in_host_memory,   // note the verlog wrapper adds 'h2000_0000
+                     input        more_in_chain,
+                     input        w_nrd_sel);
+      logic [31:0] attributes;
+      attributes = 'h0;
+      attributes[0] = w_nrd_sel;
+      attributes[1] = more_in_chain;
+      i_gn4124.host_mem_write_noaddr_twiddles(start_address_in_host_memory + 'h0000, local_addr); // remote address
+      i_gn4124.host_mem_write_noaddr_twiddles(start_address_in_host_memory + 'h0004, host_addr[ 0+:32]); // hstartL
+      i_gn4124.host_mem_write_noaddr_twiddles(start_address_in_host_memory + 'h0008, host_addr[32+:32]); // hstartH
+      i_gn4124.host_mem_write_noaddr_twiddles(start_address_in_host_memory + 'h000C, length); // count
+      i_gn4124.host_mem_write_noaddr_twiddles(start_address_in_host_memory + 'h0010, next_chained_descr_addr[ 0+:32]); // nextL
+      i_gn4124.host_mem_write_noaddr_twiddles(start_address_in_host_memory + 'h0014, next_chained_descr_addr[32+:32]); // nextH
+      i_gn4124.host_mem_write_noaddr_twiddles(start_address_in_host_memory + 'h0018, attributes); // attrib
+   endtask
 
-      for (addr = 'h00; addr < 'h40; addr += 1)
-        begin
-           expected = 32'h80000020 - addr;
-           mem_check(4 * addr, expected);
-        end
 
-      repeat(4) @(posedge clk_125m);
 
-      $write("PASS\n");
 
-      // ---------------------------------
-      $write("Test %0d/%0d: 256B DMA reads with stalling: ",
-             ntest++, tests);
+   task automatic t_get_device_mem(ref uint32_t wrd_addr, output [31:0] data);
+      data[ 0+:8] = MEM.U_DPRAM.gen_splitram.U_RAM_SPLIT.ram0[wrd_addr];
+      data[ 8+:8] = MEM.U_DPRAM.gen_splitram.U_RAM_SPLIT.ram1[wrd_addr];
+      data[16+:8] = MEM.U_DPRAM.gen_splitram.U_RAM_SPLIT.ram2[wrd_addr];
+      data[24+:8] = MEM.U_DPRAM.gen_splitram.U_RAM_SPLIT.ram3[wrd_addr];
+   endtask
 
-      // Setup DMA
-      acc.write('h14, 'h100); // count
-      acc.write('h20, 'h00); // attrib
-      acc.write('h0c, 'h20000000); // hstartL
-      acc.write('h10, 'h00000000); // hstartH
 
-      stall_cycle = '{0,1,2,14,15,16,17,30,31,32,33,61,62,63};
-      foreach(stall_cycle[i])
-        begin
-           acc.write('h00, 'h01); // start
-
-           @(posedge wb_dma_out.stb);
-           repeat(stall_cycle[i]) @(posedge clk_125m);
-           force DUT.cmp_wrapped_gn4124.dma_stall_i = 1'b1;
-           @(posedge clk_125m);
-           force DUT.cmp_wrapped_gn4124.dma_ack_i = 1'b0;
-           @(posedge clk_125m);
-           release DUT.cmp_wrapped_gn4124.dma_stall_i;
-           @(posedge clk_125m);
-           release DUT.cmp_wrapped_gn4124.dma_ack_i;
-
-           @(posedge dma_irq);
-
-           check_irq_status;
-           clear_irq;
-
-           for (addr = 'h00; addr < 'h40; addr += 1)
-             begin
-                expected = 32'h80000020 - addr;
-                mem_check(4 * addr, expected);
-             end
-
-           repeat(4) @(posedge clk_125m);
-        end
+   task automatic t_blat_device_mem(input uint32_t wrd_addr, input [31:0] data);
+     MEM.U_DPRAM.gen_splitram.U_RAM_SPLIT.ram0[wrd_addr] = data[ 0+:8];
+     MEM.U_DPRAM.gen_splitram.U_RAM_SPLIT.ram1[wrd_addr] = data[ 8+:8];
+     MEM.U_DPRAM.gen_splitram.U_RAM_SPLIT.ram2[wrd_addr] = data[16+:8];
+     MEM.U_DPRAM.gen_splitram.U_RAM_SPLIT.ram3[wrd_addr] = data[24+:8];
+   endtask
 
-      $write("PASS\n");
 
-      // ---------------------------------
-      $write("Test %0d/%0d: 8KiB chained DMA write: ",
-             ntest++, tests);
 
-      // Setup data in BFM memory
-      for (addr = 'h00; addr < 'h800; addr += 1)
-        i_gn4124.host_mem_write(4 * addr, 32'h80000020 - addr);
+   task automatic t_compare_memories(input [63:0]    pcie_start_addr,   // note needs to be 8B aligned
+                                     input [31:0]    wb_start_addr,
+                                     input  int      words_to_compare,
+                                     output uint32_t bad_comparisons,
+                                     output uint32_t good_comparisons,
+                                     input  bit      verbose,
+                                     input  bit      no_twiddles = 1'b0);
+   uint32_t     addr_cnt;
+   uint64_t     system_dat;
+   logic [31:0] wb_mem_dat;
+   logic [31:0] difference;
+   bit          error_detected;
+   uint32_t     wb_addr;
+   uint64_t     pcie_addr;
+
+   bad_comparisons  = 0;
+   good_comparisons = 0;
+
+   if (verbose) begin
+      $write("Checking Expected memory contents against what is in the wishbone target memory\n");
+      $write("________________________________________________________________________________\n");
+      $write("| Addr (byte) | wishbone   | mem content |     PCIE_ADDR      |  error |  cnt  |\n");
+      $write("|-------------+------------+-------------+--------------------+--------+-------|\n");
+   end
 
-      // Setup DMA chain info in BFM memory
-      i_gn4124.host_mem_write('h20000, 'h00001800); // remote address
-      i_gn4124.host_mem_write('h20004, 'h20001800); // hstartL
-      i_gn4124.host_mem_write('h20008, 'h00000000); // hstartH
-      i_gn4124.host_mem_write('h2000C, 'h800); // count
-      i_gn4124.host_mem_write('h20010, 'h00); // nextL
-      i_gn4124.host_mem_write('h20014, 'h00); // nextH
-      i_gn4124.host_mem_write('h20018, 'h01); // attrib
+   for (addr_cnt = 'h00; addr_cnt < words_to_compare; addr_cnt += 1)begin
+      wb_addr   = wb_start_addr/4   + addr_cnt ;
+      pcie_addr = pcie_start_addr + (addr_cnt * 4);
 
-      // Setup DMA
-      acc.write('h14, 'h1800); // count
-      acc.write('h20, 'h03); // attrib
-      acc.write('h0c, 'h20000000); // hstartL
-      acc.write('h10, 'h00000000); // hstartH
+      t_get_device_mem(       wb_addr,  wb_mem_dat);
 
-      // Point to chain info in BFM memory
-      acc.write('h18, 'h20020000); // nextL
-      acc.write('h1C, 'h00000000); // nextH
+      if (1'b1 == no_twiddles ) begin
+         i_gn4124.host_mem_read_noaddr_twiddles(pcie_addr, system_dat);
+      end else begin
+         i_gn4124.host_mem_read(pcie_addr, system_dat);
+      end
 
-      acc.write('h00, 'h01); // start
+      difference = wb_mem_dat ^ system_dat[0+:32];
 
-      @(posedge dma_irq);
+       if (difference !== 32'h0) begin
+         error_detected = 1'b1;
+      end else begin
+         error_detected = 1'b0;
+      end
 
-      check_irq_status;
-      clear_irq;
+      if (1'b1 ==  error_detected) begin
+         bad_comparisons++;
+      end else begin
+         good_comparisons++;
+      end
 
-      repeat(4) @(posedge clk_125m);
+      if (verbose) begin
+         $write("| 0x:%08x | 0x%08x | 0x%08x  | 0x%016x |   %1b    | %05d |",
+                  4* wb_addr, wb_mem_dat,
+                                      system_dat[31:0],
+                                                  pcie_addr,
+                                                             error_detected,
+                                                                       addr_cnt
+               );
+         if (1'b1 ==  error_detected) begin
+            $write(" *** mismatch 0x%08x\n", difference);
+         end else begin
+            $write("\n");
+         end
+      end
+   end
 
-      $write("PASS\n");
+   if (verbose) begin
+      $write("------------------------------------------------------------------------------\n ");
+   end
+   endtask
 
-      // wait for WB transfer to finish
-      #5us;
 
-      // Check all four byte swap settings
-      // ---------------------------------
-      for (int i = 0; i < 4; i++) begin
-         $write("Test %0d/%0d: 8KiB DMA read (byte swap = %0d): ",
-                ntest++, tests, i);
 
-         // Restart
-         acc.write('h14, 'h2000); // count
-         acc.write('h20, 'h00); // attrib
-         acc.write('h0c, 'h20000000); // hstartL
-         acc.write('h10, 'h00000000); // hstartH
-         acc.write('h00, (i << 2) | 'h01); // start
+// The tests themselves are in the following include file
+   `include "testcases.svh"
 
-         @(posedge dma_irq);
 
-         check_irq_status;
+// ************************************************************************************************
+// * Main body of the testbench that acutally drives and tests the DUT
+// ************************************************************************************************
+   initial begin
+      automatic int ntest = 1;
+      const     int tests = 13;
 
-         for (addr = 'h00; addr < 'h800; addr += 1)
-           begin
-              expected = 32'h80000020 - addr;
-              if (i == 1)
-                expected = {<<8{expected}};
-              else if (i == 2)
-                expected = {<<16{expected}};
-              else if (i == 3)
-                expected = {<<16{{<<8{expected}}}};
-              mem_check(4 * addr, expected);
-           end
+      @(posedge i_gn4124.ready);
 
-         clear_irq;
+      acc = i_gn4124.get_accessor();
 
-         repeat(4) @(posedge clk_125m);
+      acc.set_default_xfer_size(4);
 
-         $write("PASS\n");
+      @(posedge clk_125m);
 
-         #1us;
-      end
 
-      $write("Test %0d/%0d: 256B DMA read with 32bit host address overflow: ",
-             ntest++, tests);
+      // Each test is contained in the testcases.svh file, and can be run atomically without needing
+      // memory contents or configuration from previous tests.
+      // Ideally this would also include configuring the BFM for each test & resets etc but that was a stage too far!
+      // The tests run should also depend on if the DMA engine is enabled (generic)
 
-      acc.write('h14, 'h100); // count
-      acc.write('h20, 'h00); // attrib
-      acc.write('h0c, 'hffffff80); // hstartL
-      acc.write('h10, 'h00000000); // hstartH
-      acc.write('h00, 'h01); // start
+      // basic tests
+      t_test_simple_wishbone_rw( ntest, tests);
 
-      // Transfer will be split internally by L2P DMA master in two requests, the first
-      // one with a 32-bit adress starting at ffff_ff80 and the next one with a 64-bit
-      // address starting at 1_0000_0000
-      @(posedge DUT.cmp_wrapped_gn4124.ldm_arb_dframe);
-      @(posedge DUT.cmp_wrapped_gn4124.sys_clk);
-      val_check("Host address overflow header", 1, DUT.cmp_wrapped_gn4124.ldm_arb_data, 'h02ff0020);
-      @(posedge DUT.cmp_wrapped_gn4124.sys_clk);
-      val_check("Host address overflow address", 1, DUT.cmp_wrapped_gn4124.ldm_arb_data, 'hffffff80);
-      @(posedge DUT.cmp_wrapped_gn4124.ldm_arb_dframe);
-      @(posedge DUT.cmp_wrapped_gn4124.sys_clk);
-      val_check("Host address overflow header", 2, DUT.cmp_wrapped_gn4124.ldm_arb_data, 'h03ff0020);
-      @(posedge DUT.cmp_wrapped_gn4124.sys_clk);
-      val_check("Host address overflow address high", 2, DUT.cmp_wrapped_gn4124.ldm_arb_data, 1);
-      @(posedge DUT.cmp_wrapped_gn4124.sys_clk);
-      val_check("Host address overflow address low", 2, DUT.cmp_wrapped_gn4124.ldm_arb_data, 0);
-
-      @(posedge dma_irq);
+      // Wishbone DMA tests
+      t_test_unchecked_dma_read_aborted(ntest, tests);
+      t_test_256B_dma_write(ntest, tests);
+      t_test_2x128B_reads(ntest, tests);
+      t_test_256B_read(ntest, tests);
+      t_test_dma_rd_with_wishbone_stalls(ntest, tests);
+      t_test_8KiB_chained_DMA_WRITE(ntest, tests);
+      t_test_8KiB_Byteswaps(ntest, tests);
+      t_test_32bit_addr_overflow_rd(ntest, tests);
 
-      //  Check irq status
-      reg_check('h04, 'h04);
-      if (dma_irq != 1'b1)
-        $fatal(1, "dma irq should be 1");
 
-      // clear irq
-      acc.write('h04, 'h04);
-      reg_check('h04, 'h00);
-      if (dma_irq != 1'b0)
-        $fatal(1, "dma irq should be 0");
+      t_test_mixed_chaining(ntest, tests);    // commented out as it will fail!
 
-      repeat(4) @(posedge clk_125m);
+      //t_test_32bit_addr_overflow_wr(ntest, tests);   /// TBD should this test pass?:
+          //                              it demonstrates a 4KB Addressing boundary crossed!!!!
+          //                               and a 32-bit => 64bit addr....
+          // This can be masked in Software (due to sensible addresses => DMA),
+          //           but it was fixed for the other direction...
 
-      $write("PASS\n");
 
       $display();
       $display("Simulation PASSED");
@@ -490,4 +371,4 @@ module main;
       $finish;
    end // initial begin
 
-endmodule // main
+endmodule // main
\ No newline at end of file
diff --git a/hdl/sim/example_tb/testcases.svh b/hdl/sim/example_tb/testcases.svh
new file mode 100644
index 0000000000000000000000000000000000000000..09d825c7e6cf219bc98293861f3eb1272689c588
--- /dev/null
+++ b/hdl/sim/example_tb/testcases.svh
@@ -0,0 +1,855 @@
+
+
+// -------------------------------------------
+//  TESTCASE:    Simple wishbone RW rest.
+//      Purpose:
+//            Sanity checking that we have access to wihbone CSR functioning...
+//      Method:
+//         Use 2 of the addr registers in the DMA control register space as a scratchpad to test read and writes
+//         Passes if readback matches expected value.
+// --------------------------------------------
+   task automatic t_test_simple_wishbone_rw(ref   int testno,
+                                            input int num_tests
+                                           );
+      uint32_t addr, val, expected;
+
+      @(posedge clk_125m);
+
+      // ---------------------------------
+      $write("Test %0d/%0d: simple read/write accesses over Wishbone: ",
+             testno++, num_tests);
+      testphase = "simple read/write accesses over Wishbone";
+
+      // Verify simple read/writes over wishbone
+      reg_check('h0, 'h0);
+
+      // use registers as scratchpad to test RW accesses...
+      //    EXTEND to validate BE functionality..
+      acc.write('h0c, 'hffacce55);
+      acc.write('h10, 'h1badcafe);
+
+      reg_check('h0c, 'hffacce55);
+      reg_check('h10, 'h1badcafe);
+
+      // Reset all DMA config registers
+      for (addr = 'h00; addr <= 'h20; addr += 4)
+        begin
+           acc.write(addr, 'h0);
+        end
+
+      reg_check('h0c, 'h0);
+      reg_check('h10, 'h0);
+
+      repeat(2) @(posedge clk_125m);
+
+      $write("PASS\n");
+   endtask
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_TEST_unchecked_dma_read_aborted
+//      Purpose:
+//            Sanity checking DMA reads and abort functinoality.
+//      Method:
+//          NOTE: NOT SELF CHECKING!!!!!!!!!!!!!
+// --------------------------------------------
+   task automatic t_test_unchecked_dma_read_aborted(ref   int ntest,
+                                                    input int tests
+                                                   );
+      testphase = "128B read over DMA, abort after first read: ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+      if (dma_irq != 1'b0)
+        $fatal(1, "dma irq should be 0");
+
+      acc.write('h14, 'h80); // count
+      acc.write('h00, 'h01); // start
+
+      // wait for transfer to start
+      @(posedge i_gn4124.l2p_valid);
+
+      repeat(2) @(posedge clk_125m);
+
+      // Test abort feature
+      acc.write('h00, 'h02);
+      reg_check('h04, 'h03);
+      acc.write('h00, 'h00);
+
+      repeat(2) @(posedge clk_125m);
+
+      $write("PASS\n");
+
+   endtask
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_test_256B_dma_write
+//      Purpose:
+//           BASic self-checking DMA write (Card to sys_mem)
+//      Method:
+//          NOTE: self cehcking of memory contents...
+// --------------------------------------------
+   task automatic t_test_256B_dma_write(ref   int ntest,
+                                         input int tests
+                                         );
+      uint32_t addr,expected, from_mem ;
+      bit      error_detected;
+      uint32_t badcomp, goodcomp;
+
+      logic [31:0] rand_wdat = $urandom();
+
+
+   //   rand_wdat = 32'h80000020;
+
+      testphase = "test_256B_dma_write: ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+
+      // Setup data in BFM memory
+      for (addr = 'h00; addr < 'h40; addr += 1)
+        i_gn4124.host_mem_write(4 * addr, rand_wdat - addr);
+
+       //  Setup & Start the DMA
+        t_setup_dma(  .host_addr               ('h2000_0000)
+                     ,.local_addr              ('h00)
+                     ,.length                  ('h100)
+                     ,.more_in_chain           (0)
+                     ,.w_nrd_sel               (1)
+                     ,.next_chained_descr_addr ('hdeadbeefbeefbeef)
+                   );
+
+      t_start_dma();
+
+      @(posedge dma_irq);
+      check_irq_status;
+      clear_irq;
+
+      repeat(4) @(posedge clk_125m);
+
+      error_detected = 1'b0;
+      for (addr = 'h00; addr < 'h40; addr += 1) begin
+           expected = rand_wdat - addr;
+           t_get_device_mem(addr, from_mem);
+           if (from_mem !== expected) begin
+              error_detected = 1'b1;
+            end
+       end
+
+       if (error_detected)
+         t_compare_memories('h00000000, 'h0, 64, badcomp, goodcomp, 1);
+
+       if (error_detected != 1'b1) begin
+            $write("PASS \n");
+       end else begin
+            $write("FAIL  -- IRQ was OK But data integrity failed!!!\n");
+       end
+      #1us;
+   endtask
+
+
+
+
+
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_test_2x128B_reads
+//      Purpose:
+//           BASic self-checking DMA read (sys_mem to card)
+//      Method:
+//          NOTE: self checking of memory contents...
+// --------------------------------------------
+   task automatic t_test_2x128B_reads(ref   int ntest,
+                                         input int tests
+                                         );
+      uint32_t addr,expected, from_mem ;
+      bit      error_detected;
+      logic [31:0] rand_wdat = $urandom();
+
+
+      testphase = "test_2x128B reads: ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+     // rand_wdat = 32'h80000020;
+
+      // TBD: make this "even more" randomised data?
+      for (addr = 'h00; addr < 'h40; addr += 1)
+        t_blat_device_mem(addr, rand_wdat - addr);
+
+      t_add_chaining_descriptor( .host_addr                   ('h2000_0080)
+                                ,.local_addr                  ('h80)
+                                ,.length                      ('h80)
+                                ,.next_chained_descr_addr     ('h0)
+                                ,.start_address_in_host_memory('h20020000)
+                                ,.more_in_chain               (1'b0)
+                                ,.w_nrd_sel                   (1'b0)
+                               );
+
+      t_setup_dma( .host_addr               ('h2000_0000)
+                  ,.local_addr              ('h00)
+                  ,.length                  ('h80)
+                  ,.more_in_chain           (1)
+                  ,.w_nrd_sel               (0)
+                  ,.next_chained_descr_addr ('h20020000)
+                 );
+
+      t_start_dma();
+
+      @(posedge dma_irq);
+
+      check_irq_status;
+      clear_irq;
+
+      for (addr = 'h00; addr < 'h40; addr += 1)
+        begin
+           expected = rand_wdat - addr;
+           mem_check(4 * addr, expected);         /// TODO: is this the correct type of memory checking!!!!
+        end
+
+      repeat(4) @(posedge clk_125m);
+
+      $write("PASS\n");
+   endtask
+
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_test_256B_read
+//      Purpose:
+//           BASic self-checking DMA read (sys_mem to card)
+//      Method:
+//          NOTE: self checking of memory contents...
+// --------------------------------------------
+   task automatic t_test_256B_read(ref   int ntest,
+                                         input int tests
+                                         );
+      uint32_t addr,expected, from_mem ;
+      bit      error_detected;
+
+      logic [31:0] rand_wdat = $urandom();
+
+
+      testphase = "test_256B_read : ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+      for (addr = 'h00; addr < 'h40; addr += 1)
+        t_blat_device_mem(addr, rand_wdat - addr*7);
+
+        t_setup_dma(  .host_addr               ('h2000_0000)
+                     ,.local_addr              ('h00)
+                     ,.length                  ('h100)
+                    // ,.attributes              ('h03) //.2
+                     ,.more_in_chain           (0)
+                     ,.w_nrd_sel               (0)
+                     ,.next_chained_descr_addr ('hffeff_0123_4567_19a0)
+                   );
+       t_start_dma();
+
+      @(posedge dma_irq);
+
+      check_irq_status;
+      clear_irq;
+
+      for (addr = 'h00; addr < 'h40; addr += 1)
+        begin
+           expected = rand_wdat - addr*7;
+           mem_check(4 * addr, expected);
+        end
+
+      repeat(4) @(posedge clk_125m);
+
+      $write("PASS\n");
+   endtask
+
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_test_dma_rd_with_wishbone_stalls
+//      Purpose:
+//            check the core WRT stalls on the wichbone bus
+//      SELF_CHECKING:
+//            GROSS check - targets are pre-used and data is non-unique
+//      Method:
+//      Notes:
+//
+// --------------------------------------------
+task automatic t_test_dma_rd_with_wishbone_stalls(ref   int ntest,
+                                                  input int tests
+                                                 );
+
+      uint32_t addr, val, expected;
+      int stall_cycle[14];
+
+      logic [31:0] rand_wdat = $urandom();
+
+      testphase = "256B DMA reads with wishbone stalling: ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+
+      for (addr = 'h00; addr < 'h40; addr += 1) begin
+        t_blat_device_mem(addr, rand_wdat - addr);
+      end
+
+
+      t_setup_dma(  .host_addr               ('h2000_0000)
+                   ,.local_addr              ('h00)
+                   ,.length                  ('h100)
+                   ,.more_in_chain           (0)              // above it is 0 -- is it intentionally using a chain?
+                   ,.w_nrd_sel               (0)
+                   ,.next_chained_descr_addr ('h20020000)
+                 );
+
+      stall_cycle = '{0,1,2,14,15,16,17,30,31,32,33,61,62,63};
+      foreach(stall_cycle[i])
+        begin
+           //acc.write('h00, 'h01); // start
+
+         $sformat(testphase, "256B DMA reads with wishbone stalling:  (stall_cycle = %0d)", stall_cycle[i]);
+
+           t_start_dma();
+
+           @(posedge wb_dma_out.stb);
+           repeat(stall_cycle[i]) @(posedge clk_125m);
+           force DUT.cmp_wrapped_gn4124.dma_stall_i = 1'b1;
+           @(posedge clk_125m);
+           force DUT.cmp_wrapped_gn4124.dma_ack_i = 1'b0;
+           @(posedge clk_125m);
+           release DUT.cmp_wrapped_gn4124.dma_stall_i;
+           @(posedge clk_125m);
+           release DUT.cmp_wrapped_gn4124.dma_ack_i;
+
+           @(posedge dma_irq);
+
+           check_irq_status;
+           clear_irq;
+
+           for (addr = 'h00; addr < 'h40; addr += 1)  begin
+                expected = rand_wdat - addr;
+                mem_check(4 * addr, expected);
+           end
+
+           repeat(4) @(posedge clk_125m);
+        end
+
+      $write("PASS\n");
+endtask
+
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_test_2x4KiB_chained_DMA_WRITE
+//      Purpose:
+//            legacy: checks write chains...
+//      SELF_CHECKING:
+//            NONE - no timeout    does not even wait to confirm transfer finished...
+//      Method:
+//      Notes:
+//
+// --------------------------------------------
+task automatic t_test_2x4KiB_chained_DMA_WRITE(ref   int ntest,
+                                                  input int tests
+                                                 );
+
+      uint32_t addr, val, expected;
+
+      // ---------------------------------
+      testphase = "2x4KiB chained DMA write: ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+      // Setup data in BFM memory
+      for (addr = 'h00; addr < 'h800; addr += 1)
+        i_gn4124.host_mem_write(4 * addr, 32'h80000020 - addr);
+
+      t_add_chaining_descriptor( .host_addr                    ('h2000_1000)
+                                ,.local_addr                   ('h1000)
+                                ,.length                       ('h1000)
+                                ,.next_chained_descr_addr      ('h0)
+                                ,.start_address_in_host_memory ('h2002_0000)
+                                ,.more_in_chain                (1'b0)
+                                ,.w_nrd_sel                    (1'b1)
+                               );
+
+
+        t_setup_dma(  .host_addr               ('h2000_0000)
+                     ,.local_addr              ('h00)
+                     ,.length                  ('h1000)
+                     ,.more_in_chain           (0)
+                     ,.w_nrd_sel               (1)
+                     ,.next_chained_descr_addr ('h2002_0000)
+                   );
+
+       t_start_dma();
+
+      @(posedge dma_irq);
+
+      check_irq_status;
+      clear_irq;
+
+      // cehck data
+
+      repeat(4) @(posedge clk_125m);
+
+      $write("PASS\n");
+
+      // wait for WB transfer to finish
+      #5us;
+
+
+endtask
+
+
+
+
+
+
+ // -------------------------------------------
+//  TESTCASE:    t_test_8KiB_chained_DMA_WRITE
+//      Purpose:
+//            legacy: checks write chains...
+//      SELF_CHECKING:
+//            NONE - no timeout    does not even wait to confirm transfer finished...
+//      Method:
+//      Notes:
+//
+// --------------------------------------------
+task automatic t_test_8KiB_chained_DMA_WRITE(ref   int ntest,
+                                                  input int tests
+                                                 );
+
+      uint32_t addr, val, expected, from_mem;
+      bit      error_detected;
+      uint32_t badcomp, goodcomp;
+
+      logic [31:0] rand_wdat = $urandom();
+
+      // ---------------------------------
+      testphase = "8KiB chained DMA write: ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+
+       // Setup data in BFM memory
+      for (addr = 'h00; addr < 'h800; addr += 1) begin
+        i_gn4124.host_mem_write(4 * addr, rand_wdat - addr);
+      end
+
+
+      t_add_chaining_descriptor( .host_addr                   (64'h20001800)
+                                ,.local_addr                  (32'h1800)
+                                ,.length                      ('h800)
+                                ,.next_chained_descr_addr     (32'hDEADBEEf)   // OK I tweaked this but...
+                                ,.start_address_in_host_memory('h2002_0000)
+                                ,.more_in_chain               (1'b0)
+                                ,.w_nrd_sel                   (1'b1)
+                               );
+
+        t_setup_dma(  .host_addr               (64'h2000_0000)
+                     ,.local_addr              ('h00)
+                     ,.length                  ('h1800)
+                     ,.more_in_chain           (1)
+                     ,.w_nrd_sel               (1)
+                     ,.next_chained_descr_addr (64'h20020000)
+                   );
+
+       t_start_dma();
+
+      @(posedge dma_irq);
+
+      check_irq_status;
+      clear_irq;
+
+      repeat(4) @(posedge clk_125m);
+
+      // do I need to wait longer here???
+
+      // Check data
+      error_detected = 1'b0;
+      for (addr = 'h00; addr < 'h800; addr += 1) begin
+           expected = rand_wdat - addr;
+           t_get_device_mem(addr, from_mem);
+           if (from_mem !== expected) begin
+              error_detected = 1'b1;
+            end
+       end
+
+       if (error_detected)
+         t_compare_memories('h00000000, 'h0, 2048, badcomp, goodcomp, 1);
+
+       if (error_detected != 1'b1) begin
+            $write("PASS \n");
+       end else begin
+            $write("FAIL  -- IRQ was OK But data integrity failed!!!\n");
+       end
+
+      #1us;
+  endtask
+
+
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_test_8KiB_Byteswaps
+//      Purpose:
+//            tests byteswap behaviour
+//      Method:
+//          .....
+// --------------------------------------------
+   task automatic t_test_8KiB_Byteswaps(ref   int ntest,
+                                        input int tests
+                                       );
+      uint32_t addr, val, expected, from_mem;
+      bit      error_detected;
+      uint32_t badcomp, goodcomp;
+
+      // Ramdomise the write data for this testcase, that way a previous test can't cause a false +ve..
+      logic [31:0] rand_wdat = $urandom();
+
+      for (int i = 0; i < 4; i++) begin
+
+          $sformat(testphase, "8KiB DMA read (byte swap = %0d) (rand = 0x%08x) : ",
+                                                          i,     rand_wdat
+                  );
+          $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+
+
+         for (addr = 'h00; addr < 'h800; addr += 1) begin
+           t_blat_device_mem(addr, rand_wdat - addr);
+         end
+
+
+        t_setup_dma(  .host_addr               ('h2000_0000)
+                     ,.local_addr              ('h00)
+                     ,.length                  ('h2000)
+                     ,.more_in_chain           (0)
+                     ,.w_nrd_sel               (0)
+                     ,.next_chained_descr_addr ('hffeff_0123_4567_19a0)
+                   );
+
+         acc.write('h00, (i << 2) | 'h01); // start    BUT byteswaps are applied...
+
+         @(posedge dma_irq);
+
+         check_irq_status;
+
+
+         for (addr = 'h00; addr < 'h800; addr += 1)
+           begin
+              expected = rand_wdat - addr;
+              if (i == 1)
+                expected = {<<8{expected}};
+              else if (i == 2)
+                expected = {<<16{expected}};
+              else if (i == 3)
+                expected = {<<16{{<<8{expected}}}};
+              mem_check(4 * addr, expected);
+           end
+
+         clear_irq;
+
+         repeat(4) @(posedge clk_125m);
+
+         $write("PASS\n");
+          rand_wdat = $urandom();
+
+         #1us;
+      end
+   endtask
+
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_test_mixed_chaining
+//      Purpose:
+//            Sanity checking a longer chain, and DMA direction changes...
+//      Method:
+//          NOTE: SELF_CHECKING: YES
+//          The purpose of this testcase is to detect wishbone R->W and W->R turnarround issues from
+//          the dma_controller state machine
+//          as of 20/01/2021 it is failing (expected) ...
+// --------------------------------------------
+   task automatic t_test_mixed_chaining(ref   int ntest,
+                                        input int tests
+                                       );
+
+      uint32_t     addr,expected;
+      uint32_t     badcomp, goodcomp;
+      logic [31:0] rand_wdat = $urandom();
+
+     // logic [31:0] wdat;
+      bit          dma_done;
+      integer      cycles;
+
+      uint32_t op_len_bytes = 128;
+
+      uint64_t  wdat;
+      uint64_t  start_addr;
+
+      testphase = " t_test_mixed_chaining: ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+      // initialise memory
+
+
+
+      for (addr = 'h00; addr < op_len_bytes/4; addr += 1) begin
+        t_blat_device_mem(addr, rand_wdat - addr);
+       // wdat = 64'h0;
+       // wdat[0+:32] = rand_wdat - addr;
+       // i_gn4124.host_mem_write_noaddr_twiddles(64'h2000_0000 + addr, wdat);
+      end
+
+
+
+      if (dma_irq != 1'b0) begin
+        $fatal(1, "dma irq should be 0");
+      end
+
+
+      // First DMA copies tha data into system memory at offset 0x1000
+        t_setup_dma(  .host_addr               ('h2000_1000)
+                     ,.local_addr              ('h00)
+                     ,.length                  (op_len_bytes)
+                     ,.more_in_chain           (1)
+                     ,.w_nrd_sel               (0)
+                     ,.next_chained_descr_addr ('h20020000)
+                   );
+      // SEcond DMA Wipes  teh region the data came from.
+      t_add_chaining_descriptor(.start_address_in_host_memory(64'h2002_0000), .next_chained_descr_addr('h2002_0020), .length(op_len_bytes), .more_in_chain(1'b1), .w_nrd_sel(1'b1), .host_addr(64'h2000_0000), .local_addr('h0000));
+
+      t_add_chaining_descriptor(.start_address_in_host_memory(64'h2002_0020), .next_chained_descr_addr('h2002_0040), .length(op_len_bytes/4), .more_in_chain(1'b1), .w_nrd_sel(1'b1), .host_addr(64'h2000_1000), .local_addr('h00));
+      t_add_chaining_descriptor(.start_address_in_host_memory(64'h2002_0040), .next_chained_descr_addr('h2002_0060), .length(op_len_bytes/4), .more_in_chain(1'b1), .w_nrd_sel(1'b0), .host_addr(64'h2000_1000), .local_addr('h00));
+      t_add_chaining_descriptor(.start_address_in_host_memory(64'h2002_0060), .next_chained_descr_addr('h2002_0080), .length(op_len_bytes/4), .more_in_chain(1'b1), .w_nrd_sel(1'b1), .host_addr(64'h2000_1000 +   op_len_bytes/4 ), .local_addr('h00 +   op_len_bytes/4));
+      t_add_chaining_descriptor(.start_address_in_host_memory(64'h2002_0080), .next_chained_descr_addr('h2002_00a0), .length(op_len_bytes/4), .more_in_chain(1'b1), .w_nrd_sel(1'b0), .host_addr(64'h2000_1000 +   op_len_bytes/4 ), .local_addr('h00 +   op_len_bytes/4));
+      t_add_chaining_descriptor(.start_address_in_host_memory(64'h2002_00a0), .next_chained_descr_addr('h2002_00c0), .length(op_len_bytes/4), .more_in_chain(1'b1), .w_nrd_sel(1'b1), .host_addr(64'h2000_1000 +   op_len_bytes/2 ), .local_addr('h00 +   op_len_bytes/2));
+      t_add_chaining_descriptor(.start_address_in_host_memory(64'h2002_00c0), .next_chained_descr_addr('h2002_00e0), .length(op_len_bytes/4), .more_in_chain(1'b1), .w_nrd_sel(1'b0), .host_addr(64'h2000_1000 +   op_len_bytes/2 ), .local_addr('h00 +   op_len_bytes/2));
+      t_add_chaining_descriptor(.start_address_in_host_memory(64'h2002_00e0), .next_chained_descr_addr('h2002_0100), .length(op_len_bytes/4), .more_in_chain(1'b1), .w_nrd_sel(1'b1), .host_addr(64'h2000_1000 + 3*op_len_bytes/4 ), .local_addr('h00 + 3*op_len_bytes/4));
+      t_add_chaining_descriptor(.start_address_in_host_memory(64'h2002_0100), .next_chained_descr_addr('hdead_beed), .length(op_len_bytes/4), .more_in_chain(1'b0), .w_nrd_sel(1'b0), .host_addr(64'h2000_1000 + 3*op_len_bytes/4 ), .local_addr('h00 + 3*op_len_bytes/4));
+
+
+      dma_done = 1'b0;
+      fork
+         begin
+            t_start_dma();
+            @(posedge dma_irq);
+            dma_done = 1'b1;
+            check_irq_status;
+            clear_irq;
+         end
+         while (1'b0 == dma_done) begin
+
+// Wait for a transfer to be done....     but to simulate something like DDR refresh timing backpressure lets wait a while...
+// Trying to backport this, I hope the semantics are right.....
+            @(posedge clk_125m);
+            if (1'b1 == DUT.cmp_wrapped_gn4124.gen_with_dma.cmp_p2l_dma_master.dma_ctrl_start_p2l_i) begin
+               @(posedge DUT.cmp_wrapped_gn4124.gen_with_dma.cmp_p2l_dma_master.dma_ctrl_done_o);
+               @(posedge clk_125m);
+               force DUT.cmp_wrapped_gn4124.dma_stall_i = 1'b1;
+               @(posedge clk_125m);
+               force DUT.cmp_wrapped_gn4124.dma_ack_i = 1'b0;
+               repeat(125) @(posedge clk_125m);
+               release DUT.cmp_wrapped_gn4124.dma_stall_i;
+               @(posedge clk_125m);
+               release DUT.cmp_wrapped_gn4124.dma_ack_i;
+            end
+         end
+      join
+
+
+
+      // allow wishbone to finish
+     //#1us;
+     // t_compare_memories('h00000000, 'h0, op_len_bytes/4, badcomp, goodcomp, 1);
+
+      for (addr = 'h00; addr < op_len_bytes/4; addr += 1)
+        begin
+           expected = rand_wdat - addr;
+           t_get_device_mem(addr, wdat);
+           if (expected !== wdat) begin
+              $write("\n  Exected 0x%08x, got 0x%08x, for addr 0x%04x \n", expected, wdat, addr);
+              $fatal();
+           end
+        end
+
+        // compare other mem regions as well!!
+
+      repeat(2) @(posedge clk_125m);
+
+
+      $write("PASS\n");
+   endtask
+
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_test_32bit_addr_overflow_wr
+//      Purpose:
+//            Test a 32bit Overflow for the host address.
+//      Method:
+//            setup the 2 test infrastructure BAR's on different consecutive addrs,
+//             (and lets be evil and make one on a 32bit bar and one a 64bit BAR...)
+//              set up a small DMA that crosses this boundary and then check the data...
+//      NOTES: TBD: this test passes, but the request corsses a 4KB boundary
+//                   (actually it is a 32 -> 64 bit addr boundary).
+//                 I epxect it should FAIL however the BFM is not flagging any errors.....   TBD
+// --------------------------------------------
+   task automatic t_test_32bit_addr_overflow_wr(ref   int ntest,
+                                                input int tests
+                                               );
+
+      uint32_t     addr;
+      uint32_t     expected;
+      uint64_t     wdat;
+      uint64_t     pcie_addr;
+      uint64_t     rdat;
+      automatic logic [31:0] rand_wdat = $urandom();
+
+      testphase = "t_test_32bit_addr_overflow_wr: ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+
+      test_subphase = "config_bars";
+      i_gn4124.t_configure_bar(0, 'h100000000, 'h20000000);
+      i_gn4124.t_configure_bar(1, 'h0fffff000, 'h00001000);
+
+
+      test_subphase = "init test mem";
+      for (addr = 'h00; addr < 16; addr += 1) begin
+         wdat = 64'h0;
+         wdat[0+:32] = rand_wdat - addr;
+         pcie_addr      = 64'hffff_fff0 + addr*4;
+         i_gn4124.host_mem_write_noaddr_twiddles(pcie_addr, wdat);
+      end
+
+      test_subphase = "config_dma";
+      t_setup_dma(  .host_addr               ('hffff_fff0)
+                   ,.local_addr              ('h00)
+                   ,.length                  ('h20)
+                   ,.more_in_chain           (0)
+                   ,.w_nrd_sel               (1)
+                   ,.next_chained_descr_addr ('h20020000)
+                 );
+
+      test_subphase = "do_dma";
+      t_start_dma();
+      @(posedge dma_irq);
+      check_irq_status;
+      clear_irq;
+
+      test_subphase = "Check_result";
+
+     // t_compare_memories('hffff_fff0, 'h0, 8, addr, expected, 1'b1, 1'b1);
+
+
+      // do data check
+      for (addr = 'h00; addr < 8; addr++)
+        begin
+           expected = rand_wdat - addr;
+           t_get_device_mem(addr, rdat);
+           //$write("\n  Exected 0x%08x, got 0x%08x, for addr 0x%04x \n", expected, rdat, addr);
+           if (expected !== rdat[31:0]) begin
+              $write("\n ERROR: Exected 0x%08x, got 0x%08x, for addr 0x%04x \n", expected, rdat, addr);
+              $fatal();
+           end
+        end
+
+
+      test_subphase = "Revert BAR configs";
+      i_gn4124.t_detault_bar_config();
+
+
+      repeat(2) @(posedge clk_125m);
+      $write("PASS\n");
+   endtask
+
+
+
+
+// -------------------------------------------
+//  TESTCASE:    t_test_32bit_addr_overflow_rd
+//      Purpose:
+//            Test a 32bit Overflow for the host address.
+//      Method:
+//            setup the 2 test infrastructure BAR's on different consecutive addrs,
+//             (and lets be evil and make one on a 32bit bar and one a 64bit BAR...)
+//              set up a small DMA that crosses this boundary and then check the data...
+// --------------------------------------------
+   task automatic t_test_32bit_addr_overflow_rd(ref   int ntest,
+                                                input int tests
+                                               );
+
+      uint32_t     addr;
+      uint32_t     expected;
+      uint64_t     wdat;
+      uint64_t     pcie_addr;
+      uint64_t     rdat;
+
+      uint64_t    pcie_start_addr = 64'hffff_ffe8;
+      automatic logic [31:0] rand_wdat = $urandom();
+
+      testphase = "t_test_32bit_addr_overflow_rd: ";
+      $write("Test %0d/%0d: %s", ntest++, tests, testphase);
+
+
+      test_subphase = "config_bars";
+      i_gn4124.t_configure_bar(0, 'h100000000, 'h20000000);
+      i_gn4124.t_configure_bar(1, 'h0fffff000, 'h00001000);
+
+
+      test_subphase = "init test mem";
+      for (addr = 'h00; addr < 16; addr += 1) begin
+         wdat = 64'h0;
+         wdat[0+:32] = rand_wdat - addr;
+         t_blat_device_mem(addr, wdat[31:0]);
+      end
+
+      test_subphase = "config_dma";
+      t_setup_dma(  .host_addr               (pcie_start_addr)
+                   ,.local_addr              ('h00)
+                   ,.length                  ('h40)
+                   ,.more_in_chain           (0)
+                   ,.w_nrd_sel               (0)
+                   ,.next_chained_descr_addr ('h20020000)
+                 );
+
+      test_subphase = "do_dma";
+      t_start_dma();
+      @(posedge dma_irq);
+      check_irq_status;
+      clear_irq;
+
+      test_subphase = "Check_result";
+
+   //   t_compare_memories(pcie_start_addr, 'h0, 16, addr, expected, 1'b1, 1'b1);
+
+
+      // do data check
+      for (addr = 'h00; addr < 16; addr++)
+        begin
+           expected = rand_wdat - addr;
+           pcie_addr = pcie_start_addr + addr*4;
+           i_gn4124.host_mem_read_noaddr_twiddles(pcie_addr, rdat);
+          // $write("\n  Exected 0x%08x, got 0x%08x, for addr 0x%04x \n", expected, rdat, addr);
+           if (expected !== rdat[31:0]) begin
+              $write("\n ERROR: Exected 0x%08x, got 0x%08x, for addr 0x%04x \n", expected, rdat, addr);
+              $fatal();
+           end
+        end
+
+
+      test_subphase = "Revert BAR configs";
+      i_gn4124.t_detault_bar_config();
+
+      repeat(2) @(posedge clk_125m);
+      $write("PASS\n");
+   endtask
+
+
diff --git a/hdl/sim/gn4124_bfm/gn4124_bfm.svh b/hdl/sim/gn4124_bfm/gn4124_bfm.svh
index f30aba1da9690ce7e1e3b7d78ad3eb373b733f69..54db75ebf9af8ccc6bb92240f0227a1441c7ef69 100644
--- a/hdl/sim/gn4124_bfm/gn4124_bfm.svh
+++ b/hdl/sim/gn4124_bfm/gn4124_bfm.svh
@@ -132,6 +132,23 @@ GN412X_BFM
    bit       ready = 0;
 
 
+   task t_configure_bar(input int      bar_number,
+                        input uint64_t bar_base_address,
+                        input uint64_t bar_size);
+      string cmd_str;
+      $sformat(cmd_str,"bfm_bar %1d %016X %016X",
+                                bar_number,
+                                    bar_base_address,
+                                           bar_size);
+      send_cmd(cmd_str);
+   endtask
+
+
+   task t_detault_bar_config();
+      t_configure_bar(0, 'h4_0000_0000, 'h2000_0000);   // LOL, a 64bit BAR should technically use slots 0 and 1 .... but...
+      t_configure_bar(1, 'h0_2000_0000, 'h2000_0000);
+   endtask
+
    task init();
       #100ns;
       internal_rstn <= 1;
@@ -140,8 +157,7 @@ GN412X_BFM
       send_cmd("init");
       send_cmd("reset %d16");
       send_cmd("bar 0 FF00000000000000 08000000 0 7 0");
-      send_cmd("bfm_bar 0 0000000040000000 20000000");
-      send_cmd("bfm_bar 1 0000000020000000 20000000");
+      t_detault_bar_config();
       send_cmd("wait %d64");
       ready = 1;
 
@@ -165,7 +181,7 @@ GN412X_BFM
       send_cmd(cmd);
    endtask // host_mem_write
 
-   task automatic host_mem_read(uint64_t addr, ref uint64_t data);
+   task automatic host_mem_read(uint64_t addr, output uint64_t data);
       string cmd;
 
       $sformat(cmd,"rd 00000000%08X F", 'h20000000 + addr);
@@ -175,6 +191,27 @@ GN412X_BFM
       join
    endtask // host_mem_read
 
+
+   task automatic host_mem_read_noaddr_twiddles(uint64_t addr, output uint64_t data);
+      string cmd;
+
+      $sformat(cmd,"rd %016X F",  addr);
+      fork
+         send_cmd(cmd);
+         readback(data);
+      join
+   endtask // host_mem_read_noaddr_twiddles
+
+
+   task host_mem_write_noaddr_twiddles(uint64_t addr, uint64_t data);
+      string cmd;
+
+      $sformat(cmd,"wr %016X F %08X", addr, data);
+      send_cmd(cmd);
+   endtask // host_mem_write_noaddr_twiddles
+
+
+
 class CBusAccessor_Gennum extends CBusAccessor;
 
    function new();