diff --git a/modules/wishbone/wb_async_bridge/wb_cpu_bridge.vhd b/modules/wishbone/wb_async_bridge/wb_async_bridge.vhd
similarity index 54%
rename from modules/wishbone/wb_async_bridge/wb_cpu_bridge.vhd
rename to modules/wishbone/wb_async_bridge/wb_async_bridge.vhd
index c9f0f3eede38d8d8f683d313ab29c8e342120608..59846bdbb75a8dba9cd741983529608ab7f038d7 100644
--- a/modules/wishbone/wb_async_bridge/wb_cpu_bridge.vhd
+++ b/modules/wishbone/wb_async_bridge/wb_async_bridge.vhd
@@ -5,7 +5,7 @@
 -- Author     : Tomasz Wlostowski
 -- Company    : CERN BE-Co-HT
 -- Created    : 2010-05-18
--- Last update: 2011-03-16
+-- Last update: 2011-09-21
 -- Platform   : FPGA-generic
 -- Standard   : VHDL'87
 -------------------------------------------------------------------------------
@@ -21,9 +21,9 @@
 -- Revisions  :
 -- Date        Version  Author          Description
 -- 2010-05-18  1.0      twlostow        Created
+-- 2011-09-21  1.1      twlostow        Added support for pipelined mode
 -------------------------------------------------------------------------------
 
-
 library ieee;
 
 use ieee.std_logic_1164.all;
@@ -31,24 +31,22 @@ use ieee.numeric_std.all;
 use ieee.math_real.log2;
 use ieee.math_real.ceil;
 
-
 use work.gencores_pkg.all;
 use work.wishbone_pkg.all;
 
-entity wb_cpu_bridge is
+entity wb_async_bridge is
   generic (
-    g_simulation           : integer := 0;
-    g_wishbone_num_masters : integer);
-
+    g_simulation          : integer := 0;
+    g_interface_mode      : t_wishbone_interface_mode;
+    g_address_granularity : t_wishbone_address_granularity);
   port(
-    sys_rst_n_i : in std_logic;         -- global reset
+    rst_n_i   : in std_logic;           -- global reset
+    clk_sys_i : in std_logic;           -- system clock
 
 -------------------------------------------------------------------------------
 -- Atmel EBI bus
 -------------------------------------------------------------------------------
 
-    cpu_clk_i  : in std_logic;          -- clock (not used now)
--- async chip select, active LOW
     cpu_cs_n_i : in std_logic;
 -- async write, active LOW
     cpu_wr_n_i : in std_logic;
@@ -71,59 +69,69 @@ entity wb_cpu_bridge is
 -- Wishbone master I/F 
 -------------------------------------------------------------------------------
 
--- wishbone clock input (refclk/2)
-    wb_clk_i  : in  std_logic;
 -- wishbone master address output (m->s, common for all slaves)
-    wb_addr_o : out std_logic_vector(c_wishbone_addr_width - 1 downto 0);
--- wishbone master data output (m->s, common for all slaves)
-    wb_data_o : out std_logic_vector(31 downto 0);
+    wb_adr_o : out std_logic_vector(c_wishbone_addr_width - 1 downto 0);
+-- wishbone master data output (m->s common for all slaves)
+    wb_dat_o : out std_logic_vector(31 downto 0);
 -- wishbone cycle strobe (m->s, common for all slaves)
-    wb_stb_o  : out std_logic;
+    wb_stb_o : out std_logic;
 -- wishbone write enable (m->s, common for all slaves)
-    wb_we_o   : out std_logic;
+    wb_we_o  : out std_logic;
 -- wishbone byte select output (m->s, common for all slaves)
-    wb_sel_o  : out std_logic_vector(3 downto 0);
-
+    wb_sel_o : out std_logic_vector(3 downto 0);
 
 -- wishbone cycle select (m->s, individual)
-    wb_cyc_o  : out std_logic_vector (g_wishbone_num_masters - 1 downto 0);
+    wb_cyc_o : out std_logic_vector (c_wishbone_addr_width - 1 downto 0);
 -- wishbone master data input (s->m, individual)
-    wb_data_i : in  std_logic_vector (32 * g_wishbone_num_masters-1 downto 0);
+    wb_dat_i : in  std_logic_vector (32 * g_wishbone_num_masters-1 downto 0);
 -- wishbone ACK input (s->m, individual)
-    wb_ack_i  : in  std_logic_vector(g_wishbone_num_masters-1 downto 0)
+    wb_ack_i : in  std_logic_vector(g_wishbone_num_masters-1 downto 0)
 
     );
 
-end wb_cpu_bridge;
-
-architecture behavioral of wb_cpu_bridge is
-
-  constant c_periph_addr_bits : integer := c_cpu_addr_width - c_wishbone_addr_width;
-
-  signal periph_addr     : std_logic_vector(c_periph_addr_bits - 1 downto 0);
-  signal periph_addr_reg : std_logic_vector(c_periph_addr_bits - 1 downto 0);
-
-  signal periph_sel     : std_logic_vector(g_wishbone_num_masters - 1 downto 0);
-  signal periph_sel_reg : std_logic_vector(g_wishbone_num_masters - 1 downto 0);
+end wb_async_bridge;
 
+architecture behavioral of wb_async_bridge is
 
   signal rw_sel, cycle_in_progress, cs_synced, rd_pulse, wr_pulse : std_logic;
   signal cpu_data_reg                                             : std_logic_vector(31 downto 0);
-  signal ack_muxed                                                : std_logic;
-  signal data_in_muxed                                            : std_logic_vector(31 downto 0);
   signal long_cycle                                               : std_logic;
-  signal wb_cyc_int                                               : std_logic;
-  
+
+
+  signal wb_in  : t_wishbone_master_in;
+  signal wb_out : t_wishbone_master_out;
 begin
 
+  U_Adapter : wb_slave_adapter
+    generic map (
+      g_master_use_struct  => false,
+      g_master_mode        => g_interface_mode
+      g_master_granularity => g_address_granularity,
+      g_slave_use_struct   => true,
+      g_slave_mode         => CLASSIC,
+      g_slave_granularity  => WORD)
+    port map (
+      clk_sys_i => clk_sys_i,
+      rst_n_i   => rst_n_i,
+      slave_i   => wb_out,
+      slave_o   => wb_in,
+      ma_adr_o  => wb_adr_o,
+      ma_dat_o  => wb_dat_o,
+      ma_sel_o  => wb_sel_o,
+      ma_cyc_o  => wb_cyc_o,
+      ma_stb_o  => wb_stb_o,
+      ma_we_o   => wb_we_o,
+      ma_dat_i  => wb_dat_i,
+      ma_ack_i  => wb_ack_i);
+
   gen_sync_chains_nosim : if(g_simulation = 0) generate
 
     sync_ffs_cs : gc_sync_ffs
       generic map (
         g_sync_edge => "positive")
       port map
-      (rst_n_i  => sys_rst_n_i,
-       clk_i    => wb_clk_i,
+      (rst_n_i  => rst_n_i,
+       clk_i    => clk_sys_i,
        data_i   => cpu_cs_n_i,
        synced_o => cs_synced,
        npulse_o => open
@@ -133,8 +141,8 @@ begin
       generic map (
         g_sync_edge => "positive")
       port map (
-        rst_n_i  => sys_rst_n_i,
-        clk_i    => wb_clk_i,
+        rst_n_i  => rst_n_i,
+        clk_i    => clk_sys_i,
         data_i   => cpu_wr_n_i,
         synced_o => open,
         npulse_o => wr_pulse
@@ -144,8 +152,8 @@ begin
       generic map (
         g_sync_edge => "positive")
       port map (
-        rst_n_i  => sys_rst_n_i,
-        clk_i    => wb_clk_i,
+        rst_n_i  => rst_n_i,
+        clk_i    => clk_sys_i,
         data_i   => cpu_rd_n_i,
         synced_o => open,
         npulse_o => rd_pulse
@@ -159,85 +167,43 @@ begin
     cs_synced <= cpu_cs_n_i;
   end generate gen_sim;
 
-
-
-  periph_addr <= cpu_addr_i (c_cpu_addr_width - 1 downto c_wishbone_addr_width);
-
-  onehot_decode : process (periph_addr)  -- periph_sel <= onehot_decode(periph_addr)
-    variable temp1 : std_logic_vector (periph_sel'high downto 0);
-    variable temp2 : integer range 0 to periph_sel'high;
+  process(clk_sys_i)
   begin
-    temp1 := (others => '0');
-    temp2 := 0;
-    for i in periph_addr'range loop
-      if (periph_addr(i) = '1') then
-        temp2 := 2*temp2+1;
-      else
-        temp2 := 2*temp2;
-      end if;
-    end loop;
-    temp1(temp2) := '1';
-    periph_sel   <= temp1;
-  end process;
-
-
-  ACK_MUX : process (periph_addr_reg, wb_ack_i)
-  begin
-    if(to_integer(unsigned(periph_addr_reg)) < g_wishbone_num_masters) then
-      ack_muxed <= wb_ack_i(to_integer(unsigned(periph_addr_reg)));
-    else
-      ack_muxed <= '0';
-    end if;
-  end process;
-
-
-  DIN_MUX : process (periph_addr_reg, wb_data_i)
-  begin
-    if(to_integer(unsigned(periph_addr_reg)) < g_wishbone_num_masters) then
-      data_in_muxed <= wb_data_i(32*to_integer(unsigned(periph_addr_reg)) + 31 downto 32 * to_integer(unsigned(periph_addr_reg)));
-    else
-      data_in_muxed <= (others => 'X');
-    end if;
-  end process;
-
-  process(wb_clk_i)
-  begin
-    if(rising_edge(wb_clk_i)) then
-      if(sys_rst_n_i = '0') then
+    if(rising_edge(clk_sys_i)) then
+      if(rst_n_i = '0') then
         cpu_data_reg      <= (others => '0');
         cycle_in_progress <= '0';
         rw_sel            <= '0';
         cpu_nwait_o       <= '1';
         long_cycle        <= '0';
 
-        wb_addr_o  <= (others => '0');
-        wb_data_o  <= (others => '0');
-        wb_sel_o   <= (others => '1');
-        wb_stb_o   <= '0';
-        wb_we_o    <= '0';
-        wb_cyc_int <= '0';
+        wb_addr_o <= (others => '0');
+        wb_data_o <= (others => '0');
+        wb_sel_o  <= (others => '1');
+        wb_stb_o  <= '0';
+        wb_we_o   <= '0';
+        wb_cyc_o  <= '0';
 
         periph_sel_reg  <= (others => '0');
         periph_addr_reg <= (others => '0');
       else
-        
 
         if(cs_synced = '0') then
 
-          wb_addr_o <= cpu_addr_i(c_wishbone_addr_width-1 downto 0);
+          wb_out.adr <= resize(cpu_addr_i, c_wishbone_addr_width);
 
           if(cycle_in_progress = '1') then
-            if(ack_muxed = '1') then
+            if(wb_in.ack = '1') then
 
               if(rw_sel = '0') then
-                cpu_data_reg <= data_in_muxed;
+                cpu_data_reg <= wb_in.dat;
               end if;
 
               cycle_in_progress <= '0';
-              wb_cyc_int        <= '0';
-              wb_sel_o          <= (others => '1');
-              wb_stb_o          <= '0';
-              wb_we_o           <= '0';
+              wb_o.cyc          <= '0';
+              wb_o.sel          <= (others => '1');
+              wb_o.stb          <= '0';
+              wb_o.we           <= '0';
               cpu_nwait_o       <= '1';
               long_cycle        <= '0';
               
@@ -247,19 +213,15 @@ begin
             end if;
             
           elsif(rd_pulse = '1' or wr_pulse = '1') then
-            wb_we_o <= wr_pulse;
-            rw_sel  <= wr_pulse;
+            wb_o.cyc <= '1';
+            wb_o.stb <= '1';
+            wb_o.we <= wr_pulse;
 
-            wb_cyc_int <= '1';
-            wb_stb_o   <= '1';
-            wb_addr_o  <= cpu_addr_i(c_wishbone_addr_width-1 downto 0);
             long_cycle <= '0';
-
-            periph_addr_reg <= cpu_addr_i (c_cpu_addr_width-1 downto c_wishbone_addr_width);
-            periph_sel_reg  <= periph_sel;
+            rw_sel     <= wr_pulse;
 
             if(wr_pulse = '1') then
-              wb_data_o <= cpu_data_b;
+              wb_out.dat <= cpu_data_b;
             end if;
 
             cycle_in_progress <= '1';
@@ -278,10 +240,4 @@ begin
     end if;
   end process;
 
-  gen_cyc_outputs : for i in 0 to g_wishbone_num_masters-1 generate
-    wb_cyc_o(i) <= wb_cyc_int and periph_sel_reg(i);
-  end generate gen_cyc_outputs;
-  
-  
-
 end behavioral;
diff --git a/modules/wishbone/wb_async_bridge/xwb_async_bridge.vhd b/modules/wishbone/wb_async_bridge/xwb_async_bridge.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..6ddbdda255608215ceb492e76be7bd4bc69d4c80
--- /dev/null
+++ b/modules/wishbone/wb_async_bridge/xwb_async_bridge.vhd
@@ -0,0 +1,73 @@
+library ieee;
+use ieee.std_logic_1164.all;
+use work.wishbone_pkg.all;
+
+entity xwb_async_bridge is
+  generic (
+    g_simulation          : integer := 0;
+    g_interface_mode      : t_wishbone_interface_mode;
+    g_address_granularity : t_wishbone_address_granularity);
+  port(
+    rst_n_i   : in std_logic;           -- global reset
+    clk_sys_i : in std_logic;           -- system clock
+
+-------------------------------------------------------------------------------
+-- Atmel EBI bus
+-------------------------------------------------------------------------------
+
+    cpu_cs_n_i : in std_logic;
+-- async write, active LOW
+    cpu_wr_n_i : in std_logic;
+-- async read, active LOW
+    cpu_rd_n_i : in std_logic;
+-- byte select, active  LOW (not used due to weird CPU pin layout - NBS2 line is
+-- shared with 100 Mbps Ethernet PHY)
+    cpu_bs_n_i : in std_logic_vector(3 downto 0);
+
+-- address input
+    cpu_addr_i : in std_logic_vector(c_cpu_addr_width-1 downto 0);
+
+-- data bus (bidirectional)
+    cpu_data_b : inout std_logic_vector(31 downto 0);
+
+-- async wait, active LOW
+    cpu_nwait_o : out std_logic;
+
+-------------------------------------------------------------------------------
+-- Wishbone master I/F 
+-------------------------------------------------------------------------------
+    master_o : out t_wishbone_master_out;
+    master_i : in  t_wishbone_master_in
+    );
+
+end xwb_async_bridge;
+
+architecture wrapper of xwb_async_bridge is
+
+begin
+
+  U_Wrapped_Bridge : wb_async_bridge
+    generic map (
+      g_simulation          => g_simulation,
+      g_interface_mode      => g_interface_mode,
+      g_address_granularity => g_address_granularity)
+    port map (
+      rst_n_i     => rst_n_i,
+      clk_sys_i   => clk_sys_i,
+      cpu_cs_n_i  => cpu_cs_n_i,
+      cpu_wr_n_i  => cpu_wr_n_i,
+      cpu_rd_n_i  => cpu_rd_n_i,
+      cpu_bs_n_i  => cpu_bs_n_i,
+      cpu_addr_i  => cpu_addr_i,
+      cpu_data_b  => cpu_data_b,
+      cpu_nwait_o => cpu_nwait_o,
+      wb_adr_o    => master_o.adr,
+      wb_dat_o    => master_o.dat,
+      wb_stb_o    => master_o.stb,
+      wb_we_o     => master_o.we,
+      wb_sel_o    => master_o.sel,
+      wb_cyc_o    => master_o.cyc,
+      wb_dat_i    => master_i.dat,
+      wb_ack_i    => master_i.ack);
+
+end wrapper;
diff --git a/modules/wishbone/wb_slave_adapter.vhd b/modules/wishbone/wb_slave_adapter.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..1dcfc4aa0bc8c382d9dc2ff9090ba72c1ad3dfa6
--- /dev/null
+++ b/modules/wishbone/wb_slave_adapter.vhd
@@ -0,0 +1,201 @@
+-- universal "adapter"
+-- pipelined <> classic
+-- word-aligned/byte-aligned address
+
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.numeric_std.all;
+
+use work.wishbone_pkg.all;
+
+entity wb_slave_adapter is
+
+  generic (
+    g_master_use_struct  : boolean;
+    g_master_mode        : t_wishbone_interface_mode;
+    g_master_granularity : t_wishbone_address_granularity;
+    g_slave_use_struct   : boolean;
+    g_slave_mode         : t_wishbone_interface_mode;
+    g_slave_granularity  : t_wishbone_address_granularity
+    );
+
+  port (
+    clk_sys_i : in std_logic;
+    rst_n_i   : in std_logic;
+
+-- slave port (i.e. wb_slave_adapter is slave)
+    sl_adr_i : in std_logic_vector(c_wishbone_address_width-1 downto 0);
+    sl_dat_i : in std_logic_vector(c_wishbone_data_width-1 downto 0);
+    sl_sel_i : in std_logic_vector(c_wishbone_data_width/8-1 downto 0);
+    sl_cyc_i : in std_logic;
+    sl_stb_i : in std_logic;
+    sl_we_i  : in std_logic;
+
+
+    sl_dat_o   : out std_logic_vector(c_wishbone_data_width-1 downto 0);
+    sl_err_o   : out std_logic;
+    sl_rty_o   : out std_logic;
+    sl_ack_o   : out std_logic;
+    sl_stall_o : out std_logic;
+    sl_int_o   : out std_logic;
+
+    slave_i : in  t_wishbone_slave_in;
+    slave_o : out t_wishbone_slave_out;
+
+-- master port (i.e. wb_slave_adapter is master)
+    ma_adr_o : out std_logic_vector(c_wishbone_address_width-1 downto 0);
+    ma_dat_o : out std_logic_vector(c_wishbone_data_width-1 downto 0);
+    ma_sel_o : out std_logic_vector(c_wishbone_data_width/8-1 downto 0);
+    ma_cyc_o : out std_logic;
+    ma_stb_o : out std_logic;
+    ma_we_o  : out std_logic;
+
+    ma_dat_i   : in std_logic_vector(c_wishbone_data_width-1 downto 0);
+    ma_err_i   : in std_logic;
+    ma_rty_i   : in std_logic;
+    ma_ack_i   : in std_logic;
+    ma_stall_i : in std_logic;
+    ma_int_i   : in std_logic;
+
+    master_i : in  t_wishbone_master_in;
+    master_o : out t_wishbone_master_out
+    );
+end wb_slave_adapter;
+
+architecture rtl of wb_slave_adapter is
+
+  function f_num_byte_address_bits
+    return integer is
+  begin
+    case c_wishbone_data_width is
+      when 8  => return 0;
+      when 16 => return 1;
+      when 32 => return 2;
+      when 64 => return 3;
+      when others =>
+        report "wb_slave_adapter: invalid c_wishbone_data_width (we support 8, 16, 32 and 64)" severity failure;
+    end case;
+    return 0;
+  end f_num_byte_address_bits;
+
+  function f_zeros(size : integer)
+    return std_logic_vector is
+  begin
+    return std_logic_vector(to_unsigned(0, size));
+  end f_zeros;
+
+  type t_fsm_state is (IDLE, WAIT4ACK);
+
+  signal fsm_state : t_fsm_state;
+
+  signal master_in  : t_wishbone_master_in;
+  signal master_out : t_wishbone_master_out;
+  signal slave_in   : t_wishbone_slave_in;
+  signal slave_out  : t_wishbone_slave_out;
+  
+begin  -- rtl
+
+  gen_slave_use_struct : if (g_slave_use_struct) generate
+    slave_in <= slave_i;
+  end generate gen_slave_use_struct;
+
+  gen_slave_use_slv : if (not g_slave_use_struct) generate
+    slave_in.cyc <= sl_cyc_i;
+    slave_in.stb <= sl_stb_i;
+    slave_in.we  <= sl_we_i;
+    slave_in.dat <= sl_dat_i;
+    slave_in.sel <= sl_sel_i;
+    slave_in.adr <= sl_adr_i;
+  end generate gen_slave_use_slv;
+
+  slave_o    <= slave_out;
+  sl_ack_o   <= slave_out.ack;
+  sl_rty_o   <= slave_out.rty;
+  sl_err_o   <= slave_out.err;
+  sl_stall_o <= slave_out.stall;
+  sl_dat_o   <= slave_out.dat;
+  sl_int_o   <= slave_out.int;
+
+
+  gen_master_use_struct : if (g_master_use_struct) generate
+    master_in <= master_i;
+  end generate gen_master_use_struct;
+
+  gen_master_use_slv : if (not g_master_use_struct) generate
+    master_in <= (
+      ack   => ma_ack_i,
+      rty   => ma_rty_i,
+      err   => ma_err_i,
+      dat   => ma_dat_i,
+      stall => ma_stall_i,
+      int   => ma_int_i);
+  end generate gen_master_use_slv;
+
+  master_o <= master_out;
+
+  p_gen_address : process(slave_in, master_out)
+  begin
+    if(g_master_granularity = g_slave_granularity) then
+      master_out.adr <= slave_in.adr;
+    elsif(g_master_granularity = BYTE) then  -- byte->word
+      master_out.adr <= slave_in.adr(c_wishbone_address_width-f_num_byte_address_bits-1 downto 0)
+                        & f_zeros(f_num_byte_address_bits);
+    else
+      master_out.adr <= f_zeros(f_num_byte_address_bits)
+                        & slave_in.adr(c_wishbone_address_width-1 downto f_num_byte_address_bits);
+    end if;
+  end process;
+
+  p_fsm : process(clk_sys_i)
+  begin
+    if rising_edge(clk_sys_i) then
+      if rst_n_i = '0' then
+        fsm_state <= IDLE;
+      else
+        case fsm_state is
+          when IDLE =>
+            if(slave_in.stb = '1') then
+              fsm_state <= WAIT4ACK;
+            end if;
+          when WAIT4ACK =>
+            if(slave_out.ack = '1') then
+              fsm_state <= IDLE;
+            end if;
+        end case;
+      end if;
+    end if;
+  end process;
+
+
+  p_gen_control : process(slave_in, slave_out, master_in, master_out)
+  begin
+    if(g_slave_mode = PIPELINED and g_master_mode = CLASSIC) then
+      if(fsm_state = IDLE) then
+        master_out.stb <= slave_in.stb;
+      else
+        master_out.stb <= '0';
+      end if;
+      slave_out.stall <= '0';
+    elsif(g_slave_mode = CLASSIC and g_master_mode = PIPELINED) then
+      master_out.stb <= slave_in.stb;
+      if(master_out.cyc = '1') then
+        slave_out.stall <= '0';
+      else
+        slave_out.stall <= not master_in.ack;
+      end if;
+    else
+      master_out.stb  <= slave_in.stb;
+      slave_out.stall <= master_in.stall;
+    end if;
+  end process;
+
+  master_out.dat <= slave_in.adr;
+  master_out.cyc <= slave_in.cyc;
+  master_out.sel <= slave_in.sel;
+  master_out.we  <= slave_in.we;
+
+  slave_out.ack <= master_in.ack;
+  slave_out.err <= master_in.err;
+  slave_out.rty <= master_in.rty;
+  slave_out.dat <= master_in.dat;
+end rtl;