diff --git a/modules/wrsw_swcore/async/mpm_fifo_mem_cell.vhd b/modules/wrsw_swcore/async/mpm_fifo_mem_cell.vhd
deleted file mode 100644
index e3d5b6977dbc55c70a0786238788d20f6cdfd69c..0000000000000000000000000000000000000000
--- a/modules/wrsw_swcore/async/mpm_fifo_mem_cell.vhd
+++ /dev/null
@@ -1,39 +0,0 @@
-library ieee;
-use ieee.std_logic_1164.all;
-use IEEE.numeric_std.all;
-
-use work.genram_pkg.all;
-
-entity mpm_fifo_mem_cell is
-  
-  generic (
-    g_width : integer;
-    g_size  : integer);
-
-  port (
-    clk_i : in std_logic;
-    wa_i  : in std_logic_vector(f_log2_size(g_size)-1 downto 0);
-    wd_i  : in std_logic_vector(g_width-1 downto 0);
-    we_i  : in std_logic;
-
-    ra_i : in  std_logic_vector(f_log2_size(g_size)-1 downto 0);
-    rd_o : out std_logic_vector(g_width-1 downto 0));
-
-end mpm_fifo_mem_cell;
-
-architecture rtl of mpm_fifo_mem_cell is
-  type t_mem_array is array(0 to g_size-1) of std_logic_vector(g_width-1 downto 0);
-
-  signal mem : t_mem_array;
-begin  -- rtl
-
-  rd_o <= mem(to_integer(unsigned(ra_i)));
-  p_write : process(clk_i)
-  begin
-    if rising_edge(clk_i) then
-      if(we_i = '1') then
-        mem(to_integer(unsigned(wa_i))) <= wd_i;
-      end if;
-    end if;
-  end process;
-end rtl;
diff --git a/modules/wrsw_swcore/async/Manifest.py b/modules/wrsw_swcore/mpm/Manifest.py
similarity index 82%
rename from modules/wrsw_swcore/async/Manifest.py
rename to modules/wrsw_swcore/mpm/Manifest.py
index 0f4e3ce07f27b26afe1fc13a36fabd802d1bc9f6..ddd07206530979a8b16bc07062d03845370f9d2b 100644
--- a/modules/wrsw_swcore/async/Manifest.py
+++ b/modules/wrsw_swcore/mpm/Manifest.py
@@ -8,6 +8,7 @@ files = ["mpm_async_grow_fifo.vhd",
          "mpm_write_path.vhd",
          "mpm_read_path.vhd",
          "mpm_async_fifo.vhd",
-         "mpm_rpath_io_block.vhd"]
+         "mpm_rpath_io_block.vhd",
+         "mpm_rpath_core_block.vhd"]
 
 
diff --git a/modules/wrsw_swcore/async/mpm_async_fifo.vhd b/modules/wrsw_swcore/mpm/mpm_async_fifo.vhd
similarity index 55%
rename from modules/wrsw_swcore/async/mpm_async_fifo.vhd
rename to modules/wrsw_swcore/mpm/mpm_async_fifo.vhd
index fb2d9f33c69f1394b955c2c2211f690875bca43d..f9986cfdb4e982a7168e9387ec1983d739a37918 100644
--- a/modules/wrsw_swcore/async/mpm_async_fifo.vhd
+++ b/modules/wrsw_swcore/mpm/mpm_async_fifo.vhd
@@ -1,3 +1,44 @@
+-------------------------------------------------------------------------------
+-- Title        : Dual clock (asynchronous) symmetric FIFO 
+-- Project      : White Rabbit Switch
+-------------------------------------------------------------------------------
+-- File         : mpm_async_fifo.vhd
+-- Author       : Tomasz WÅ‚ostowski
+-- Company      : CERN BE-CO-HT
+-- Created      : 2012-01-30
+-- Last update  : 2012-01-30
+-- Platform     : FPGA-generic
+-- Standard     : VHDL'93
+-- Dependencies : genram_pkg, mpm_fifo_mem_cell, mpm_async_fifo_ctrl.
+-------------------------------------------------------------------------------
+-- Description: Simple, gray-encoded dual clock symmetric FIFO (input and
+-- output have same widths).
+-------------------------------------------------------------------------------
+--
+-- Copyright (c) 2012 CERN
+--
+-- This source file is free software; you can redistribute it   
+-- and/or modify it under the terms of the GNU Lesser General   
+-- Public License as published by the Free Software Foundation; 
+-- either version 2.1 of the License, or (at your option) any   
+-- later version.                                               
+--
+-- This source is distributed in the hope that it will be       
+-- useful, but WITHOUT ANY WARRANTY; without even the implied   
+-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      
+-- PURPOSE.  See the GNU Lesser General Public License for more 
+-- details.                                                     
+--
+-- You should have received a copy of the GNU Lesser General    
+-- Public License along with this source; if not, download it   
+-- from http://www.gnu.org/licenses/lgpl-2.1.html
+--
+-------------------------------------------------------------------------------
+-- Revisions  :
+-- Date        Version  Author          Description
+-- 2012-01-30  1.0      twlostow        Created
+-------------------------------------------------------------------------------
+
 library ieee;
 use ieee.std_logic_1164.all;
 use ieee.numeric_std.all;
diff --git a/modules/wrsw_swcore/async/mpm_async_fifo_ctrl.vhd b/modules/wrsw_swcore/mpm/mpm_async_fifo_ctrl.vhd
similarity index 97%
rename from modules/wrsw_swcore/async/mpm_async_fifo_ctrl.vhd
rename to modules/wrsw_swcore/mpm/mpm_async_fifo_ctrl.vhd
index 645cd0d43aafeba3eda0f0ab2cc087d3df9f876c..d7f51702ee59a3dc3ea25b1278d6daa60c797656 100644
--- a/modules/wrsw_swcore/async/mpm_async_fifo_ctrl.vhd
+++ b/modules/wrsw_swcore/mpm/mpm_async_fifo_ctrl.vhd
@@ -166,11 +166,11 @@ begin  -- rtl
 
   p_gen_going_full : process(wr_i, wcb, rcb)
   begin
-    if ((wcb.bin (wcb.bin'left-2 downto 0) = rcb.bin_x(rcb.bin_x'left-2 downto 0))
+    if ((wcb.bin (wcb.bin'left-1 downto 0) = rcb.bin_x(rcb.bin_x'left-1 downto 0))
         and (wcb.bin(wcb.bin'left) /= rcb.bin_x(wcb.bin_x'left))) then
       going_full <= '1';
     elsif (wr_i = '1'
-           and (wcb.bin_next(wcb.bin'left-2 downto 0) = rcb.bin_x(rcb.bin_x'left-2 downto 0))
+           and (wcb.bin_next(wcb.bin'left-1 downto 0) = rcb.bin_x(rcb.bin_x'left-1 downto 0))
            and (wcb.bin_next(wcb.bin'left) /= rcb.bin_x(rcb.bin_x'left))) then
       going_full <= '1';
     else
diff --git a/modules/wrsw_swcore/async/mpm_async_grow_fifo.vhd b/modules/wrsw_swcore/mpm/mpm_async_grow_fifo.vhd
similarity index 99%
rename from modules/wrsw_swcore/async/mpm_async_grow_fifo.vhd
rename to modules/wrsw_swcore/mpm/mpm_async_grow_fifo.vhd
index bae9df4de20d735ceced09de8c9465bd6e241a60..85a81b1a870ed642d7891ae2f87c5e731518abf4 100644
--- a/modules/wrsw_swcore/async/mpm_async_grow_fifo.vhd
+++ b/modules/wrsw_swcore/mpm/mpm_async_grow_fifo.vhd
@@ -3,7 +3,7 @@
 -- Title        : Dual clock (asynchronous) asymmetric (1:N) FIFO
 -- Project      : White Rabbit Switch
 -------------------------------------------------------------------------------
--- File         : swc_async_grow_fifo.vhd
+-- File         : mpm_async_grow_fifo.vhd
 -- Author       : Tomasz WÅ‚ostowski
 -- Company      : CERN BE-CO-HT
 -- Created      : 2012-01-30
diff --git a/modules/wrsw_swcore/async/mpm_async_shrink_fifo.vhd b/modules/wrsw_swcore/mpm/mpm_async_shrink_fifo.vhd
similarity index 62%
rename from modules/wrsw_swcore/async/mpm_async_shrink_fifo.vhd
rename to modules/wrsw_swcore/mpm/mpm_async_shrink_fifo.vhd
index 0af580255a8f42a53419973b3e243328a51cf66e..8c46ef7596235834265ff3464e83247cf0acf409 100644
--- a/modules/wrsw_swcore/async/mpm_async_shrink_fifo.vhd
+++ b/modules/wrsw_swcore/mpm/mpm_async_shrink_fifo.vhd
@@ -1,3 +1,46 @@
+-------------------------------------------------------------------------------
+-- Title        : Dual clock (asynchronous) asymmetric (N:1) FIFO
+-- Project      : White Rabbit Switch
+-------------------------------------------------------------------------------
+-- File         : mpm_async_shrink_fifo.vhd
+-- Author       : Tomasz WÅ‚ostowski
+-- Company      : CERN BE-CO-HT
+-- Created      : 2012-01-30
+-- Last update  : 2012-01-30
+-- Platform     : FPGA-generic
+-- Standard     : VHDL'93
+-- Dependencies : mpm_fifo_mem_cell, mpm_async_fifo_ctrl, genram_pkg
+-------------------------------------------------------------------------------
+-- Description: Asynchronous FIFO with asymmetric (serializing) read/write
+-- ports. Single (g_ratio * g_width)-wide word written to input port d_i produces
+-- a sequence of g_ratio words (g_width wide) on the output port q_o.
+-- An additional sideband channel (side_i/side_o) is provided for passing auxillary data.
+-------------------------------------------------------------------------------
+--
+-- Copyright (c) 2012 CERN
+--
+-- This source file is free software; you can redistribute it   
+-- and/or modify it under the terms of the GNU Lesser General   
+-- Public License as published by the Free Software Foundation; 
+-- either version 2.1 of the License, or (at your option) any   
+-- later version.                                               
+--
+-- This source is distributed in the hope that it will be       
+-- useful, but WITHOUT ANY WARRANTY; without even the implied   
+-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      
+-- PURPOSE.  See the GNU Lesser General Public License for more 
+-- details.                                                     
+--
+-- You should have received a copy of the GNU Lesser General    
+-- Public License along with this source; if not, download it   
+-- from http://www.gnu.org/licenses/lgpl-2.1.html
+--
+-------------------------------------------------------------------------------
+-- Revisions  :
+-- Date        Version  Author          Description
+-- 2012-01-30  1.0      twlostow        Created
+-------------------------------------------------------------------------------
+
 library ieee;
 use ieee.std_logic_1164.all;
 use ieee.numeric_std.all;
@@ -11,22 +54,30 @@ entity mpm_async_shrink_fifo is
     g_ratio          : integer;
     g_size           : integer;
     g_sideband_width : integer);
-
   port (
     rst_n_a_i : in std_logic;
     clk_wr_i  : in std_logic;
     clk_rd_i  : in std_logic;
 
+    -- 1: write word available on (d_i) to the FIFO
     we_i : in std_logic;
+     -- data input
     d_i  : in std_logic_vector(g_width*g_ratio-1 downto 0);
 
+    -- 1: performs a read of a single wide word, outputted on q_o
     rd_i : in  std_logic;
+    -- registered data output
     q_o  : out std_logic_vector(g_width-1 downto 0);
 
+    -- "Sideband" channel (for passing auxillary data, such as page indices)
     side_i : in  std_logic_vector(g_sideband_width-1 downto 0);
     side_o : out std_logic_vector(g_sideband_width-1 downto 0);
 
+    -- Flush input. When 1, flushes the remaining narrow words of the currently
+    -- processed wide word and proceeds immediately to the next wide word.
+    -- Used usually for flushing rubbish at the end of the last page of a packet.
     flush_i : in  std_logic := '0';
+   
     full_o  : out std_logic;
     empty_o : out std_logic);
 
@@ -121,7 +172,7 @@ begin  -- rtl
     if rst_n_a_i = '0' then
       rd_count     <= (others => '0');
       q_reg        <= (others => '0');
-      line_flushed <= real_rd;
+      line_flushed <= '1';--real_rd;
       empty_narrow <= '1';
     elsif rising_edge(clk_rd_i) then
 
@@ -151,7 +202,7 @@ begin  -- rtl
   end process;
 
 
-  real_rd <= '1'                       when (rd_count = 0) and rd_i = '1' else '0';
+  real_rd <= '1'                       when (rd_count = 0) and rd_i = '1'  else '0';
   q_o     <= q_reg(g_width-1 downto 0) when line_flushed = '1'            else q_muxed;
   empty_o <= empty_narrow;
   
diff --git a/modules/wrsw_swcore/mpm/mpm_fifo_mem_cell.vhd b/modules/wrsw_swcore/mpm/mpm_fifo_mem_cell.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..a7b783ca0dcf0fbee916ed6103789400de6dac1d
--- /dev/null
+++ b/modules/wrsw_swcore/mpm/mpm_fifo_mem_cell.vhd
@@ -0,0 +1,82 @@
+-------------------------------------------------------------------------------
+-- Title        : Distributed RAM/LUTRAM FIFO Memory Cell
+-- Project      : White Rabbit Switch
+-------------------------------------------------------------------------------
+-- File         : mpm_fifo_mem_cell.vhd
+-- Author       : Tomasz WÅ‚ostowski
+-- Company      : CERN BE-CO-HT
+-- Created      : 2012-01-30
+-- Last update  : 2012-01-30
+-- Platform     : FPGA-generic
+-- Standard     : VHDL'93
+-- Dependencies : genram_pkg
+-------------------------------------------------------------------------------
+-- Description: Small RAM block inferrable as Distributed RAM.
+-------------------------------------------------------------------------------
+--
+-- Copyright (c) 2012 CERN
+--
+-- This source file is free software; you can redistribute it   
+-- and/or modify it under the terms of the GNU Lesser General   
+-- Public License as published by the Free Software Foundation; 
+-- either version 2.1 of the License, or (at your option) any   
+-- later version.                                               
+--
+-- This source is distributed in the hope that it will be       
+-- useful, but WITHOUT ANY WARRANTY; without even the implied   
+-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      
+-- PURPOSE.  See the GNU Lesser General Public License for more 
+-- details.                                                     
+--
+-- You should have received a copy of the GNU Lesser General    
+-- Public License along with this source; if not, download it   
+-- from http://www.gnu.org/licenses/lgpl-2.1.html
+--
+-------------------------------------------------------------------------------
+-- Revisions  :
+-- Date        Version  Author          Description
+-- 2012-01-30  1.0      twlostow        Created
+-------------------------------------------------------------------------------
+
+
+library ieee;
+use ieee.std_logic_1164.all;
+use IEEE.numeric_std.all;
+
+use work.genram_pkg.all;
+
+entity mpm_fifo_mem_cell is
+  
+  generic (
+    g_width : integer;
+    g_size  : integer);
+
+  port (
+    -- write port clock
+    clk_i : in std_logic;
+    wa_i  : in std_logic_vector(f_log2_size(g_size)-1 downto 0);
+    wd_i  : in std_logic_vector(g_width-1 downto 0);
+    we_i  : in std_logic;
+
+    -- combinatorial read port (rd_o <= mem[ra_i])
+    ra_i : in  std_logic_vector(f_log2_size(g_size)-1 downto 0);
+    rd_o : out std_logic_vector(g_width-1 downto 0));
+
+end mpm_fifo_mem_cell;
+
+architecture rtl of mpm_fifo_mem_cell is
+  type t_mem_array is array(0 to g_size-1) of std_logic_vector(g_width-1 downto 0);
+
+  signal mem : t_mem_array;
+begin  -- rtl
+
+  rd_o <= mem(to_integer(unsigned(ra_i)));
+  p_write : process(clk_i)
+  begin
+    if rising_edge(clk_i) then
+      if(we_i = '1') then
+        mem(to_integer(unsigned(wa_i))) <= wd_i;
+      end if;
+    end if;
+  end process;
+end rtl;
diff --git a/modules/wrsw_swcore/async/mpm_pipelined_mux.vhd b/modules/wrsw_swcore/mpm/mpm_pipelined_mux.vhd
similarity index 72%
rename from modules/wrsw_swcore/async/mpm_pipelined_mux.vhd
rename to modules/wrsw_swcore/mpm/mpm_pipelined_mux.vhd
index de51b48eabad140886e2efc3a2e8a0d6bc93538f..225ca05f17c350d9cabe1c56efc9ba20d8296a86 100644
--- a/modules/wrsw_swcore/async/mpm_pipelined_mux.vhd
+++ b/modules/wrsw_swcore/mpm/mpm_pipelined_mux.vhd
@@ -30,11 +30,22 @@ architecture rtl of mpm_pipelined_mux is
   type t_generic_slv_array is array (integer range <>, integer range <>) of std_logic;
 
   constant c_first_stage_muxes : integer := (g_inputs+2)/3;
+  constant c_num_inputs_floor3 : integer := ((g_inputs+2)/3) * 3;
 
   signal first_stage : t_generic_slv_array(0 to c_first_stage_muxes-1, g_width-1 downto 0);
 
+
+
+  signal d_extended   : std_logic_vector(c_num_inputs_floor3 * g_width - 1 downto 0);
+  signal sel_extended : std_logic_vector(c_num_inputs_floor3 - 1 downto 0) := (others => '0');
+  
+  
 begin  -- rtl
 
+  d_extended (d_i'left downto 0) <= d_i;
+  sel_extended (sel_i'left downto 0) <= sel_i;
+
+  
   -- 1st stage, optimized for 5-input LUTs: mux each 3-input groups or 0
   -- if (sel == 11)
   gen_1st_stage : for i in 0 to c_first_stage_muxes-1 generate
@@ -45,12 +56,12 @@ begin  -- rtl
           if rst_n_i = '0' then
             first_stage(i, j) <= '0';
           else
-            if(sel_i(3*i + 2 downto 3*i) = "001") then
-              first_stage(i, j) <= d_i(i * 3 * g_width + j);
-            elsif (sel_i(3*i + 2 downto 3*i) = "010") then
-              first_stage(i, j) <= d_i(i * 3 * g_width + g_width + j);
-            elsif (sel_i(3*i + 2 downto 3*i) = "100") then
-              first_stage(i, j) <= d_i(i * 3 * g_width + 2*g_width + j);
+            if(sel_extended(3*i + 2 downto 3*i) = "001") then
+              first_stage(i, j) <= d_extended(i * 3 * g_width + j);
+            elsif (sel_extended(3*i + 2 downto 3*i) = "010") then
+              first_stage(i, j) <= d_extended(i * 3 * g_width + g_width + j);
+            elsif (sel_extended(3*i + 2 downto 3*i) = "100") then
+              first_stage(i, j) <= d_extended(i * 3 * g_width + 2*g_width + j);
             else
               first_stage(i, j) <= '0';
             end if;
diff --git a/modules/wrsw_swcore/async/mpm_private_pkg.vhd b/modules/wrsw_swcore/mpm/mpm_private_pkg.vhd
similarity index 100%
rename from modules/wrsw_swcore/async/mpm_private_pkg.vhd
rename to modules/wrsw_swcore/mpm/mpm_private_pkg.vhd
diff --git a/modules/wrsw_swcore/async/mpm_read_path.vhd b/modules/wrsw_swcore/mpm/mpm_read_path.vhd
similarity index 76%
rename from modules/wrsw_swcore/async/mpm_read_path.vhd
rename to modules/wrsw_swcore/mpm/mpm_read_path.vhd
index 1f12d34313c320133cd8a9e258e1536500ca0680..4df2af381ab6ef2ebbb14c1f6742dac744b61d72 100644
--- a/modules/wrsw_swcore/async/mpm_read_path.vhd
+++ b/modules/wrsw_swcore/mpm/mpm_read_path.vhd
@@ -33,7 +33,7 @@ entity mpm_read_path is
     rport_d_o        : out std_logic_vector (g_num_ports * g_data_width -1 downto 0);
     rport_dvalid_o   : out std_logic_vector (g_num_ports-1 downto 0);
     rport_dlast_o    : out std_logic_vector (g_num_ports-1 downto 0);
-    rport_dsel_o     : out std_logic_vector(g_partial_select_width -1 downto 0);
+    rport_dsel_o     : out std_logic_vector (g_num_ports * g_partial_select_width -1 downto 0);
     rport_dreq_i     : in  std_logic_vector (g_num_ports-1 downto 0);
     rport_abort_i    : in  std_logic_vector (g_num_ports-1 downto 0);
     rport_pg_addr_i  : in  std_logic_vector (g_num_ports * g_page_addr_width -1 downto 0);
@@ -52,9 +52,9 @@ entity mpm_read_path is
 end mpm_read_path;
 
 architecture rtl of mpm_read_path is
-
   component mpm_rpath_io_block
     generic (
+      g_num_pages            : integer;
       g_data_width           : integer;
       g_page_addr_width      : integer;
       g_page_size            : integer;
@@ -64,7 +64,7 @@ architecture rtl of mpm_read_path is
     port (
       clk_io_i         : in  std_logic;
       rst_n_io_i       : in  std_logic;
-      rport_d_o        : out std_logic_vector(g_page_addr_width-1 downto 0);
+      rport_d_o        : out std_logic_vector(g_data_width-1 downto 0);
       rport_dvalid_o   : out std_logic;
       rport_dlast_o    : out std_logic;
       rport_dsel_o     : out std_logic_vector(g_partial_select_width-1 downto 0);
@@ -79,17 +79,37 @@ architecture rtl of mpm_read_path is
       ll_data_i        : in  std_logic_vector(g_page_addr_width + 1 downto 0);
       pf_full_i        : in  std_logic;
       pf_we_o          : out std_logic;
-      pf_pg_addr_o     : out std_logic_vector(g_page_addr_width-1 downto 0);
-      pf_pg_lines_o    : out std_logic_vector(f_log2_size(g_page_size / g_ratio)-1 downto 0);
+      pf_fbm_addr_o    : out std_logic_vector(f_log2_size(g_num_pages * g_page_size / g_ratio) - 1 downto 0);
+      pf_pg_lines_o    : out std_logic_vector(f_log2_size(g_page_size / g_ratio + 1)-1 downto 0);
       df_empty_i       : in  std_logic;
       df_flush_o       : out std_logic;
       df_rd_o          : out std_logic;
       df_d_i           : in  std_logic_vector(g_data_width-1 downto 0));
   end component;
 
+  component mpm_rpath_core_block
+    generic (
+      g_num_pages       : integer;
+      g_data_width      : integer;
+      g_page_addr_width : integer;
+      g_page_size       : integer;
+      g_ratio           : integer);
+    port (
+      clk_core_i    : in  std_logic;
+      rst_n_core_i  : in  std_logic;
+      fbm_req_o     : out std_logic;
+      fbm_grant_i   : in  std_logic;
+      fbm_addr_o    : out std_logic_vector(f_log2_size(g_num_pages * g_page_size / g_ratio)-1 downto 0);
+      df_full_i     : in  std_logic;
+      df_we_o       : out std_logic;
+      pf_fbm_addr_i : in  std_logic_vector(f_log2_size(g_num_pages * g_page_size / g_ratio) - 1 downto 0);
+      pf_pg_lines_i : in  std_logic_vector(f_log2_size(g_page_size / g_ratio + 1)-1 downto 0);
+      pf_empty_i    : in  std_logic;
+      pf_rd_o       : out std_logic);
+  end component;
 
   constant c_page_count_width    : integer := f_log2_size(g_max_packet_size / g_page_size + 1);
-  constant c_line_size_width     : integer := f_log2_size(g_page_size / g_ratio);
+  constant c_line_size_width     : integer := f_log2_size(g_page_size / g_ratio + 1);
   constant c_fbm_data_width      : integer := g_ratio * g_data_width;
   constant c_fbm_entries         : integer := g_num_pages * g_page_size / g_ratio;
   constant c_fbm_addr_width      : integer := f_log2_size(c_fbm_entries);
@@ -115,18 +135,18 @@ architecture rtl of mpm_read_path is
     df_rd    : std_logic;
     df_empty : std_logic;
     df_flush : std_logic;
-    df_q     : std_logic_vector(c_fbm_data_width -1 downto 0);
+    df_q     : std_logic_vector(g_data_width -1 downto 0);
 
     -- Page FIFO input port
     pf_full     : std_logic;
     pf_we       : std_logic;
-    pf_pg_addr  : std_logic_vector(g_page_addr_width-1 downto 0);
+    pf_fbm_addr : std_logic_vector(c_fbm_addr_width-1 downto 0);
     pf_pg_lines : std_logic_vector(c_line_size_width-1 downto 0);
-    pf_d        : std_logic_vector(c_line_size_width + g_page_addr_width -1 downto 0);
+    pf_d        : std_logic_vector(c_line_size_width + c_fbm_addr_width -1 downto 0);
 
     -- Linked list address & arbitration
     ll_req     : std_logic;
-    ll_grant_d : std_logic_vector(2 downto 0);
+    ll_grant : std_logic;
     ll_addr    : std_logic_vector(g_page_addr_width-1 downto 0);
   end record;
 
@@ -135,13 +155,13 @@ architecture rtl of mpm_read_path is
     df_full : std_logic;
     df_we   : std_logic;
 
-
-    
-    pf_q         : std_logic_vector(g_page_addr_width + c_line_size_width-1 downto 0);
+    pf_q         : std_logic_vector(c_fbm_addr_width + c_line_size_width-1 downto 0);
     pf_rd        : std_logic;
     pf_empty     : std_logic;
-    pg_remaining : std_logic_vector(c_line_size_width - 1 downto 0);
+    pf_pg_lines : std_logic_vector(c_line_size_width - 1 downto 0);
+    pf_fbm_addr     : std_logic_vector(c_fbm_addr_width - 1 downto 0);
     fbm_addr     : std_logic_vector(c_fbm_addr_width - 1 downto 0);
+
     mem_req      : std_logic;
     mem_grant_d  : std_logic_vector(3 downto 0);
   end record;
@@ -161,7 +181,7 @@ architecture rtl of mpm_read_path is
 
   signal rport : t_mpm_read_port_array(g_num_ports-1 downto 0);
 
-  signal rd_mux_a_in : std_logic_vector(g_num_ports * c_fbm_data_width -1 downto 0);
+  signal rd_mux_a_in : std_logic_vector(g_num_ports * c_fbm_addr_width -1 downto 0);
   signal rd_mux_sel  : std_logic_vector(g_num_ports-1 downto 0);
 
   signal fbm_data_reg : std_logic_vector(c_fbm_data_width-1 downto 0);
@@ -177,6 +197,7 @@ begin  -- rtl
       rport(i).d_valid;
     rport_dlast_o(i) <=
       rport(i).d_last;
+    
     rport_dsel_o(g_partial_select_width * (i+1) - 1 downto g_partial_select_width * i) <=
       rport(i).d_sel;
 
@@ -202,29 +223,10 @@ begin  -- rtl
     end if;
   end process;
 
-  gen_mem_access_arbiter : for i in 0 to g_num_ports-1 generate
-
-    mem_req(i)             <= core(i).mem_req and not core(i).mem_grant_d(0);
-    core(i).mem_grant_d(0) <= mem_grant(i);
-
-    -- Delay the grant signal to generate enables for each pipeline stage
-    process(clk_core_i)
-    begin
-      if rising_edge(clk_core_i) then
-        if rst_n_core_i = '0' then
-          core(i).mem_grant_d(3 downto 1) <= (others => '0');
-        else
-          core(i).mem_grant_d(3) <= core(i).mem_grant_d(2);
-          core(i).mem_grant_d(2) <= core(i).mem_grant_d(1);
-          core(i).mem_grant_d(1) <= core(i).mem_grant_d(0);
-        end if;
-      end if;
-    end process;
-  end generate gen_mem_access_arbiter;
 
   gen_mux_inputs : for i in 0 to g_num_ports-1 generate
     rd_mux_a_in(c_fbm_addr_width * (i + 1) - 1 downto c_fbm_addr_width * i) <= core(i).fbm_addr;
-    rd_mux_sel(i)                                                           <= core(i).mem_grant_d(0);
+    rd_mux_sel(i)                                                           <= mem_grant(i);    
   end generate gen_mux_inputs;
 
   U_Rd_Address_Mux : mpm_pipelined_mux
@@ -249,7 +251,7 @@ begin  -- rtl
   gen_fifos : for i in 0 to g_num_ports-1 generate
     U_Page_Fifo : mpm_async_fifo
       generic map (
-        g_width => g_page_addr_width + 1,
+        g_width => c_line_size_width + c_fbm_addr_width,
         g_size  => 8)
       port map (
         rst_n_a_i => rst_n_core_i,
@@ -276,7 +278,7 @@ begin  -- rtl
         d_i       => fbm_data_reg,
         rd_i      => io(i).df_rd,
         q_o       => io(i).df_q,
-        side_i => "",
+        side_i    => "",
         flush_i   => io(i).df_flush,
         full_o    => core(i).df_full,
         empty_o   => io(i).df_empty);
@@ -297,10 +299,12 @@ begin  -- rtl
   end process;
 
   gen_ll_access_arbiter : for i in 0 to g_num_ports-1 generate
-    ll_req(i)           <= io(i).ll_req and not io(i).ll_grant_d(0);
-    io(i).ll_grant_d(0) <= ll_grant(i);
+    ll_req(i)           <= io(i).ll_req and not io(i).ll_grant;
+    io(i).ll_grant <= ll_grant(i);
   end generate gen_ll_access_arbiter;
 
+
+
   p_ll_mux_addr : process(clk_io_i)
     variable muxed : std_logic_vector(g_page_addr_width-1 downto 0);
   begin
@@ -309,7 +313,7 @@ begin  -- rtl
         ll_addr_o <= (others => '0');
       else
         for i in 0 to g_num_ports-1 loop
-          if(io(i).ll_grant_d(0) = '1') then
+          if(io(i).ll_grant = '1') then
             muxed := io(i).ll_addr;
           end if;
         end loop;  -- i
@@ -319,9 +323,32 @@ begin  -- rtl
   end process;
 
 
-  gen_output_controllers : for i in 0 to g_num_ports-1 generate
-    U_Output_Ctrl : mpm_rpath_io_block
+  gen_io_core_blocks : for i in 0 to g_num_ports-1 generate
+
+
+    U_Core_Block: mpm_rpath_core_block
+      generic map (
+        g_num_pages       => g_num_pages,
+        g_data_width      => g_data_width,
+        g_page_addr_width => g_page_addr_width,
+        g_page_size       => g_page_size,
+        g_ratio           => g_ratio)
+      port map (
+        clk_core_i    => clk_core_i,
+        rst_n_core_i  => rst_n_core_i,
+        fbm_req_o     => mem_req(i),
+        fbm_grant_i   => mem_grant(i),
+        fbm_addr_o    => core(i).fbm_addr,
+        df_full_i     => core(i).df_full,
+        df_we_o       => core(i).df_we,
+        pf_fbm_addr_i => core(i).pf_fbm_addr,
+        pf_pg_lines_i => core(i).pf_pg_lines,
+        pf_empty_i    => core(i).pf_empty,
+        pf_rd_o       => core(i).pf_rd);
+    
+    U_IO_Block : mpm_rpath_io_block
       generic map (
+        g_num_pages            => g_num_pages,
         g_data_width           => g_data_width,
         g_page_addr_width      => g_page_addr_width,
         g_page_size            => g_page_size,
@@ -341,20 +368,22 @@ begin  -- rtl
         rport_pg_valid_i => rport(i).pg_valid,
         rport_pg_addr_i  => rport(i).pg_addr,
         ll_req_o         => io(i).ll_req,
-        ll_grant_i       => io(i).ll_grant_d(2),
+        ll_grant_i       => io(i).ll_grant,
         ll_addr_o        => io(i).ll_addr,
         ll_data_i        => ll_data_i,
         pf_full_i        => io(i).pf_full,
         pf_we_o          => io(i).pf_we,
-        pf_pg_addr_o     => io(i).pf_pg_addr,
+        pf_fbm_addr_o    => io(i).pf_fbm_addr,
         pf_pg_lines_o    => io(i).pf_pg_lines,
         df_empty_i       => io(i).df_empty,
         df_flush_o       => io(i).df_flush,
         df_rd_o          => io(i).df_rd,
         df_d_i           => io(i).df_q);
 
-    io(i).pf_d <= io(i).pf_pg_lines & io(i).pf_pg_addr;
-  end generate gen_output_controllers;
+    io(i).pf_d <= io(i).pf_pg_lines & io(i).pf_fbm_addr;
+    core(i).pf_fbm_addr <= core(i).pf_q(c_fbm_addr_width-1 downto 0);
+    core(i).pf_pg_lines <= core(i).pf_q(c_fbm_addr_width + c_line_size_width-1 downto c_fbm_addr_width);
+  end generate gen_io_core_blocks;
   
 
 end rtl;
diff --git a/modules/wrsw_swcore/mpm/mpm_rpath_core_block.vhd b/modules/wrsw_swcore/mpm/mpm_rpath_core_block.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..c6e2e0610a683a1842f4aae9ef198a7f5ec40114
--- /dev/null
+++ b/modules/wrsw_swcore/mpm/mpm_rpath_core_block.vhd
@@ -0,0 +1,134 @@
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.numeric_std.all;
+
+use work.gencores_pkg.all;
+use work.genram_pkg.all;
+
+entity mpm_rpath_core_block is
+  
+  generic (
+    g_num_pages       : integer;
+    g_data_width      : integer;
+    g_page_addr_width : integer;
+    g_page_size       : integer;
+    g_ratio           : integer
+    );
+
+  port(
+    clk_core_i   : in std_logic;
+    rst_n_core_i : in std_logic;
+
+    -- F. B. Memory I/F
+    fbm_req_o   : out std_logic;
+    fbm_grant_i : in  std_logic;
+    fbm_addr_o  : out std_logic_vector(f_log2_size(g_num_pages * g_page_size / g_ratio)-1 downto 0);
+
+    df_full_i : in  std_logic;
+    df_we_o   : out std_logic;
+
+    pf_fbm_addr_i : in std_logic_vector(f_log2_size(g_num_pages * g_page_size / g_ratio) - 1 downto 0);
+    pf_pg_lines_i : in std_logic_vector(f_log2_size(g_page_size / g_ratio + 1)-1 downto 0);
+    pf_empty_i : in  std_logic;
+    pf_rd_o    : out std_logic
+    );
+end mpm_rpath_core_block;
+
+architecture behavioral of mpm_rpath_core_block is
+
+  constant c_lines_per_page   : integer := g_page_size/g_ratio;
+  constant c_page_lines_width : integer := f_log2_size(c_lines_per_page + 1);
+  constant c_page_size_width  : integer := f_log2_size(g_page_size + 1);
+  constant c_fbm_entries         : integer := g_num_pages * g_page_size / g_ratio;
+  constant c_fbm_addr_width      : integer := f_log2_size(c_fbm_entries);
+
+  -- fbm_grant_d timing:
+  -- 0: mux_sel = our block, increase line counter, check ovf
+  -- 1: if ovf, increase page counter, read next page
+  -- 2: fbm_data = our data, fifo_we = 1
+
+  type t_core_state is (IDLE, GET_ADDR, READ_PAGE);
+
+  signal fbm_grant_d : std_logic_vector(4 downto 0);
+  signal state       : t_core_state;
+
+  signal fbm_addr            : unsigned(c_fbm_addr_width-1 downto 0);
+  signal fbm_remaining_lines : unsigned(c_page_lines_width-1 downto 0);
+  signal page_read           : std_logic;
+  signal fbm_req_int         : std_logic;
+  
+  
+begin  -- behavioral
+
+
+  fbm_grant_d(0) <= fbm_grant_i;
+  
+  p_delay_grant : process(clk_core_i)
+  begin
+    if rising_edge(clk_core_i) then
+      if rst_n_core_i = '0' then
+        fbm_grant_d (fbm_grant_d'left downto 1) <= (others => '0');
+      else
+        fbm_grant_d (fbm_grant_d'left downto 1) <= fbm_grant_d(fbm_grant_d'left-1 downto 0) ;
+      end if;
+    end if;
+  end process;
+
+  p_fsm : process(clk_core_i)
+  begin
+    if rising_edge(clk_core_i) then
+      if rst_n_core_i = '0' then
+        state       <= IDLE;
+        fbm_req_int <= '0';
+        page_read   <= '0';
+        df_we_o <= '0';
+      else
+        case state is
+          when IDLE =>
+            if(pf_empty_i = '0') then
+              state <= GET_ADDR;
+            end if;
+
+          when GET_ADDR =>
+            fbm_addr            <= unsigned(pf_fbm_addr_i);
+            fbm_remaining_lines <= unsigned(pf_pg_lines_i);
+            state               <= READ_PAGE;
+
+          when READ_PAGE =>
+
+            if(unsigned(fbm_grant_d) = 0 and df_full_i = '0') then
+              fbm_addr_o  <= std_logic_vector(fbm_addr);
+              fbm_req_int <= '1';
+            else
+              fbm_req_int <= '0';
+            end if;
+
+            if(fbm_grant_d(0) = '1') then
+              fbm_addr         <= fbm_addr + 1;
+              fbm_remaining_lines <= fbm_remaining_lines - 1;
+            end if;
+
+
+          if(fbm_grant_d(3) = '1') then
+            df_we_o <= '1';
+            else
+              df_we_o <='0';
+            end if;
+            
+            if(fbm_grant_d(4) = '1' and fbm_remaining_lines = 0) then
+              state <= IDLE;
+            end if;
+        end case;
+      end if;
+    end if;
+  end process;
+
+  fbm_req_o <= fbm_req_int and not (fbm_grant_d(0) or fbm_grant_d(1) or fbm_grant_d(2));
+--  fbm_addr_o <= std_logic_vector(fbm_addr);
+  pf_rd_o <= '1' when (state = IDLE and pf_empty_i = '0') else '0';
+--  df_we_o <= fbm_grant_d(4);
+  
+end behavioral;
+
+
+
diff --git a/modules/wrsw_swcore/async/mpm_rpath_io_block.vhd b/modules/wrsw_swcore/mpm/mpm_rpath_io_block.vhd
similarity index 68%
rename from modules/wrsw_swcore/async/mpm_rpath_io_block.vhd
rename to modules/wrsw_swcore/mpm/mpm_rpath_io_block.vhd
index 37e723a92902872a9aeeafeeb180eb88c11113d1..d0d537f8d0c9e861b4fb4d43ac3514f8541e1acc 100644
--- a/modules/wrsw_swcore/async/mpm_rpath_io_block.vhd
+++ b/modules/wrsw_swcore/mpm/mpm_rpath_io_block.vhd
@@ -8,6 +8,7 @@ use work.genram_pkg.all;
 entity mpm_rpath_io_block is
   
   generic (
+    g_num_pages            : integer;
     g_data_width           : integer;
     g_page_addr_width      : integer;
     g_page_size            : integer;
@@ -16,11 +17,11 @@ entity mpm_rpath_io_block is
     g_max_packet_size      : integer);
 
   port (
-    clk_io_i : in std_logic;
-    rst_n_io_i  : in std_logic;
+    clk_io_i   : in std_logic;
+    rst_n_io_i : in std_logic;
 
 -- Read Port Interface
-    rport_d_o        : out std_logic_vector(g_page_addr_width-1 downto 0);
+    rport_d_o        : out std_logic_vector(g_data_width-1 downto 0);
     rport_dvalid_o   : out std_logic;
     rport_dlast_o    : out std_logic;
     rport_dsel_o     : out std_logic_vector(g_partial_select_width-1 downto 0);
@@ -39,8 +40,8 @@ entity mpm_rpath_io_block is
 -- Page FIFO interface
     pf_full_i     : in  std_logic;
     pf_we_o       : out std_logic;
-    pf_pg_addr_o  : out std_logic_vector(g_page_addr_width-1 downto 0);
-    pf_pg_lines_o : out std_logic_vector(f_log2_size(g_page_size / g_ratio)-1 downto 0);
+    pf_fbm_addr_o : out std_logic_vector(f_log2_size(g_num_pages * g_page_size / g_ratio) - 1 downto 0);
+    pf_pg_lines_o : out std_logic_vector(f_log2_size(g_page_size / g_ratio + 1)-1 downto 0);
 
 -- Data FIFO interface
     df_empty_i : in  std_logic;
@@ -63,35 +64,37 @@ architecture behavioral of mpm_rpath_io_block is
      y : integer) return unsigned is
 
     type t_div_factor is record
-      inc   : boolean;
-      mul   : integer;
-      shift : integer;
+      adjust : integer range 0 to 15;
+      mul    : integer range 0 to 4095;
+      shift  : integer range 0 to 12;
     end record;
 
-    type t_div_factor_array is array (1 to 10) of t_div_factor;
-
-    constant c_div_factors : t_div_factor_array :=
-      ((false, 1, 0),
-       (false, 1, 1),
-       (true, 85, 8),
-       (false, 1, 2),
-       (true, 51, 8),
-       (true, 85, 9),
-       (true, 73, 9),
-       (false, 1, 3),
-       (true, 227, 11),
-       (true, 51, 9));
-
-    variable tmp    : unsigned(x'left + 11 downto 0);
+    type t_div_factor_array is array (1 to 16) of t_div_factor;
+
+    constant c_div_factors : t_div_factor_array := (
+      (0, 1, 0),                        -- ratio == 1
+      (1, 1, 1),                        -- ratio == 2
+      (3, 85, 8),                       -- ratio == 3
+      (3, 1, 2),                        -- ratio == 4
+      (5, 51, 8),                       -- ratio == 5
+      (6, 341, 11),                     -- ratio == 6
+      (7, 73, 9),                       -- ratio == 7
+      (7, 1, 3),                        -- ratio == 8
+      (9, 227, 11),                     -- ratio == 9
+      (10, 409, 12),                    -- ratio == 10
+      (11, 93, 10),                     -- ratio == 11
+      (12, 341, 12),                    -- ratio == 12
+      (13, 157, 11),                    -- ratio == 13
+      (14, 73, 10),                     -- ratio == 14
+      (15, 17, 8),                      -- ratio == 15
+      (15, 1, 4));                      -- ratio == 16
+
+    variable tmp    : unsigned(x'left + 12 downto 0);
     variable result : unsigned(c_page_lines_width-1 downto 0);
 
   begin
 
-    if(c_div_factors(y).inc) then
-      tmp := (x+1) * to_unsigned(c_div_factors(y).mul, 11);
-    else
-      tmp := x * to_unsigned(c_div_factors(y).mul, 11);
-    end if;
+    tmp := (x+to_unsigned(c_div_factors(y).adjust, 4)) * to_unsigned(c_div_factors(y).mul, 12);
 
     return tmp(c_page_lines_width - 1 + c_div_factors(y).shift downto c_div_factors(y).shift);
   end f_fast_div_pagesize;
@@ -102,7 +105,7 @@ architecture behavioral of mpm_rpath_io_block is
     eof       : std_logic;
     next_page : std_logic_vector(g_page_addr_width-1 downto 0);
     dsel      : std_logic_vector(g_partial_select_width-1 downto 0);
-    size      : std_logic_vector(f_log2_size(g_page_size)-1 downto 0);
+    size      : std_logic_vector(f_log2_size(g_page_size + 1)-1 downto 0);
   end record;
 
 
@@ -112,13 +115,14 @@ architecture behavioral of mpm_rpath_io_block is
   signal page_state : t_page_fetch_state;
   signal cur_page   : std_logic_vector(g_page_addr_width-1 downto 0);
   signal cur_ll     : t_ll_entry;
+  signal fvalid_int : std_logic;
 
   -- Page fetch <> FIFO / output FSM signals
 
   -- Address of the current page
   signal fetch_pg_addr  : std_logic_vector(g_page_addr_width-1 downto 0);
   -- Number of words in the page (1 = 1 word...g_page_size-1 == full page)
-  signal fetch_pg_words : unsigned(c_page_lines_width-1 downto 0);
+  signal fetch_pg_words : unsigned(f_log2_size(g_page_size+1)-1 downto 0);
   -- Number of FBM lines used by this page (1 = 1 line, etc.)
   signal fetch_pg_lines : unsigned(c_page_lines_width-1 downto 0);
   -- Partial select bits for the last word of the packet
@@ -145,11 +149,16 @@ architecture behavioral of mpm_rpath_io_block is
   signal words_xmitted : unsigned(c_word_count_width-1 downto 0);
 
   signal d_last_int, d_valid_int, df_rd_int : std_logic;
-  signal pf_we_int : std_logic;
+  signal pf_we_int                          : std_logic;
+
+  signal ll_req_int, ll_grant_d0, ll_grant_d1 : std_logic;
+  signal counters_equal : std_logic;
   
 begin  -- behavioral
 
 
+
+  
   fetch_abort <= '0';                   -- FIXME: add support for ABORT
 
   p_gen_page_ack : process(clk_io_i)
@@ -164,16 +173,19 @@ begin  -- behavioral
   end process;
 
   pf_we_int     <= fetch_valid and not pf_full_i;
-  pf_we_o <= pf_we_int;
-  pf_pg_addr_o  <= fetch_pg_addr;
+  pf_we_o       <= pf_we_int;
+  pf_fbm_addr_o <= std_logic_vector(resize(unsigned(fetch_pg_addr) * to_unsigned(c_lines_per_page, c_page_lines_width), pf_fbm_addr_o'length));
   pf_pg_lines_o <= std_logic_vector(fetch_pg_lines);
 
+  counters_equal <= '1' when (words_total = words_xmitted) else '0';
+  
   p_count_words : process(clk_io_i)
   begin
     if rising_edge(clk_io_i) then
       if rst_n_io_i = '0' or (d_last_int = '1' and d_valid_int = '1') then
         words_total   <= (others => '0');
         words_xmitted <= to_unsigned(1, words_xmitted'length);
+        d_last_int <= '0';
       else
 
         if(fetch_last = '1' and fetch_ack = '1') then
@@ -186,19 +198,21 @@ begin  -- behavioral
 
         if(fetch_ack = '1') then
           if(fetch_first = '1') then
-            words_total <= fetch_pg_words;
+            words_total <= resize(fetch_pg_words, words_total'length);
           else
             words_total <= words_total + fetch_pg_words;
           end if;
         end if;
+
+        d_last_int <= counters_equal;
+        
       end if;
     end if;
   end process;
 
-  d_last_int <= '1' when (words_total = words_xmitted) else '0';
 
 
-  df_rd_int <= rport_dreq_i and not df_empty_i;
+  df_rd_int <= rport_dreq_i and not (df_empty_i or d_last_int);
   df_rd_o   <= df_rd_int;
 
   p_gen_d_valid : process(clk_io_i)
@@ -207,18 +221,19 @@ begin  -- behavioral
       if rst_n_io_i = '0' then
         d_valid_int <= '0';
       else
+        
         d_valid_int <= df_rd_int;
       end if;
     end if;
   end process;
 
-  df_flush_o <= df_rd_int and d_last_int;
+  df_flush_o <= d_last_int;-- counters_equal;
 
   rport_dvalid_o <= d_valid_int;
   rport_dlast_o  <= d_last_int;
   rport_d_o      <= df_d_i;
-  rport_dsel_o <= saved_dsel when (words_total = words_xmitted) else (others => '1');
-  
+  rport_dsel_o   <= saved_dsel when d_last_int = '1' else (others => '1');
+
 
 -------------------------------------------------------------------------------
 -- Page fetcher logic
@@ -236,17 +251,25 @@ begin  -- behavioral
   -- 16-bit datapath: 0 = 1 byte, 1 = 2 bytes, etc.)
   cur_ll.dsel      <= ll_data_i(g_page_addr_width-1 downto g_page_addr_width-g_partial_select_width);
 
+  fetch_valid <= fvalid_int and not fetch_ack;
+
   p_page_fsm : process(clk_io_i)
   begin
     if rising_edge(clk_io_i) then
       if rst_n_io_i = '0' then
         page_state <= FIRST_PAGE;
 
-        fetch_valid    <= '0';
+        fvalid_int     <= '0';
         rport_pg_req_o <= '0';
-        ll_req_o       <= '0';
-
+        ll_req_int       <= '0';
+        ll_grant_d0 <= '0';
+        ll_grant_d1 <= '0';
+        
       else
+
+        ll_grant_d0 <= ll_grant_i;
+        ll_grant_d1 <= ll_grant_d0;
+
         case page_state is
 -- request the 1st page of the packet from the Read port interface. Once got
 -- the 1st address, go to FIRST_LL state
@@ -255,7 +278,7 @@ begin  -- behavioral
             if(rport_pg_valid_i = '1') then
               rport_pg_req_o <= '0';
               cur_page       <= rport_pg_addr_i;
-              ll_req_o       <= '1';
+              ll_req_int       <= '1';
               ll_addr_o      <= rport_pg_addr_i;
               page_state     <= NEXT_LINK;
               fetch_first    <= '1';
@@ -266,43 +289,54 @@ begin  -- behavioral
 -- fetch the length (or the link to the next packet) from the LL for the
 -- current page
           when NEXT_LINK =>
+            
             if(fetch_abort = '1') then
               page_state <= FIRST_PAGE;
-              ll_req_o   <= '0';
-            elsif(ll_grant_i = '1' and cur_ll.valid = '1') then
-              cur_page <= cur_ll.next_page;
-
+              ll_req_int   <= '0';
+            elsif(ll_grant_d1 = '1' and cur_ll.valid = '1') then
+              cur_page  <= cur_ll.next_page;
+              ll_addr_o <= cur_ll.next_page;
               if(cur_ll.eof = '1') then
                 page_state     <= WAIT_LAST_ACK;
                 fetch_pg_words <= unsigned(cur_ll.size);
                 fetch_pg_lines <= f_fast_div_pagesize(unsigned(cur_ll.size), g_ratio);
+                fetch_pg_addr  <= cur_page;
                 fetch_dsel     <= cur_ll.dsel;
-                fetch_valid    <= '1';
+                fvalid_int     <= '1';
                 fetch_last     <= '1';
+
               else
-                page_state    <= WAIT_ACK;
-                fetch_pg_addr <= cur_page;
-                fetch_valid   <= '1';
-                fetch_last    <= '0';
+                page_state <= WAIT_ACK;
+
+                fetch_pg_words <= to_unsigned(g_page_size, fetch_pg_words'length);
+                fetch_pg_lines <= to_unsigned(c_lines_per_page, fetch_pg_lines'length);
+                fetch_pg_addr  <= cur_page;
+                fetch_pg_addr  <= cur_page;
+                fvalid_int     <= '1';
+                fetch_last     <= '0';
               end if;
-              ll_req_o <= '0';
+              ll_req_int <= '0';
             else
-              ll_req_o <= '1';
+              ll_req_int <= '1';
             end if;
 
           when WAIT_ACK =>
             if(fetch_abort = '1') then
               page_state <= FIRST_PAGE;
             elsif(fetch_ack = '1') then
-              ll_req_o    <= '1';
+              ll_req_int    <= '1';
               fetch_first <= '0';
-              page_state  <= NEXT_LINK;
+              fvalid_int  <= '0';
+
+
+              page_state <= NEXT_LINK;
             end if;
 
           when WAIT_LAST_ACK =>
             if(fetch_ack = '1') then
               rport_pg_req_o <= '1';
               fetch_first    <= '0';
+              fvalid_int     <= '0';
               page_state     <= FIRST_PAGE;
             end if;
             
@@ -311,5 +345,7 @@ begin  -- behavioral
     end if;
   end process;
 
+  ll_req_o <= ll_req_int and not (ll_grant_i or ll_grant_d0 or ll_grant_d1);
 
+  
 end behavioral;
diff --git a/modules/wrsw_swcore/async/mpm_top.vhd b/modules/wrsw_swcore/mpm/mpm_top.vhd
similarity index 93%
rename from modules/wrsw_swcore/async/mpm_top.vhd
rename to modules/wrsw_swcore/mpm/mpm_top.vhd
index c68ec3eeec58307c8fce2b12c5dab345118f3fff..e2ea5119712c9196f067a04266c34afa5970863b 100644
--- a/modules/wrsw_swcore/async/mpm_top.vhd
+++ b/modules/wrsw_swcore/mpm/mpm_top.vhd
@@ -10,11 +10,11 @@ use work.mpm_private_pkg.all;
 entity mpm_top is
   generic (
     g_data_width           : integer := 18;
-    g_ratio                : integer := 8;
+    g_ratio                : integer := 2;
     g_page_size            : integer := 64;
     g_num_pages            : integer := 2048;
-    g_num_ports            : integer := 18;
-    g_fifo_size            : integer := 8;
+    g_num_ports            : integer := 8;
+    g_fifo_size            : integer := 4;
     g_page_addr_width      : integer := 11;
     g_partial_select_width : integer := 1;
     g_max_packet_size      : integer := 10000
@@ -39,7 +39,7 @@ entity mpm_top is
     rport_d_o        : out std_logic_vector (g_num_ports * g_data_width -1 downto 0);
     rport_dvalid_o   : out std_logic_vector (g_num_ports-1 downto 0);
     rport_dlast_o    : out std_logic_vector (g_num_ports-1 downto 0);
-    rport_dsel_o     : out std_logic_vector(g_partial_select_width -1 downto 0);
+    rport_dsel_o     : out std_logic_vector (g_num_ports * g_partial_select_width -1 downto 0);
     rport_dreq_i     : in  std_logic_vector (g_num_ports-1 downto 0);
     rport_abort_i    : in  std_logic_vector (g_num_ports-1 downto 0);
     rport_pg_addr_i  : in  std_logic_vector (g_num_ports * g_page_addr_width -1 downto 0);
@@ -97,11 +97,12 @@ architecture rtl of mpm_top is
     port (
       clk_io_i         : in  std_logic;
       clk_core_i       : in  std_logic;
-      rst_n_i          : in  std_logic;
+      rst_n_io_i       : in  std_logic;
+      rst_n_core_i     : in  std_logic;
       rport_d_o        : out std_logic_vector (g_num_ports * g_data_width -1 downto 0);
       rport_dvalid_o   : out std_logic_vector (g_num_ports-1 downto 0);
       rport_dlast_o    : out std_logic_vector (g_num_ports-1 downto 0);
-      rport_dsel_o     : out std_logic_vector(g_partial_select_width -1 downto 0);
+      rport_dsel_o     : out std_logic_vector(g_num_ports * g_partial_select_width -1 downto 0);
       rport_dreq_i     : in  std_logic_vector (g_num_ports-1 downto 0);
       rport_abort_i    : in  std_logic_vector (g_num_ports-1 downto 0);
       rport_pg_addr_i  : in  std_logic_vector (g_num_ports * g_page_addr_width -1 downto 0);
@@ -170,7 +171,8 @@ begin  -- rtl
     port map (
       clk_io_i         => clk_io_i,
       clk_core_i       => clk_core_i,
-      rst_n_i          => rst_n_i,
+      rst_n_core_i          => rst_n_core,
+      rst_n_io_i => rst_n_io,
       rport_d_o        => rport_d_o,
       rport_dvalid_o   => rport_dvalid_o,
       rport_dlast_o    => rport_dlast_o,
@@ -186,7 +188,7 @@ begin  -- rtl
       fbm_data_i       => fbm_rd_data);
 
 
-  -- The memory itself.
+  -- The Frame Buffer Memory (F.B.M.), Formerly known as F.... Big Memory
   U_F_B_Memory : generic_dpram
     generic map (
       g_data_width => g_data_width * g_ratio,
diff --git a/modules/wrsw_swcore/async/mpm_write_path.vhd b/modules/wrsw_swcore/mpm/mpm_write_path.vhd
similarity index 100%
rename from modules/wrsw_swcore/async/mpm_write_path.vhd
rename to modules/wrsw_swcore/mpm/mpm_write_path.vhd