diff --git a/modules/wr_endpoint/ep_tx_framer.vhd b/modules/wr_endpoint/ep_tx_framer.vhd
index e1cef4a6883ff4664f985186fd74bb027d147950..31b462505013de853bb493a9871b515bd4384b36 100644
--- a/modules/wr_endpoint/ep_tx_framer.vhd
+++ b/modules/wr_endpoint/ep_tx_framer.vhd
@@ -6,7 +6,7 @@
 -- Author     : Tomasz Wlostowski
 -- Company    : CERN BE-CO-HT
 -- Created    : 2009-06-22
--- Last update: 2011-07-12
+-- Last update: 2011-08-15
 -- Platform   : FPGA-generic
 -- Standard   : VHDL'87
 -------------------------------------------------------------------------------
@@ -32,6 +32,7 @@ use ieee.numeric_std.all;
 
 library work;
 use work.gencores_pkg.all;
+use work.genram_pkg.all;
 use work.wr_fabric_pkg.all;
 use work.endpoint_private_pkg.all;
 use work.ep_wbgen2_pkg.all;
@@ -102,7 +103,7 @@ architecture behavioral of ep_tx_framer is
 
   constant c_IFG_LENGTH : integer := 8;
 
-  type t_tx_framer_state is (TXF_IDLE, TXF_HEADER, TXF_DATA, TXF_OOB, TXF_WAIT_CRC, TXF_EMBED_CRC1, TXF_EMBED_CRC2, TXF_EMBED_CRC3, TXF_GAP, TXF_PAD, TXF_ABORT);
+  type t_tx_framer_state is (TXF_IDLE, TXF_ADDR, TXF_PAUSE, TXF_QHEADER, TXF_DATA, TXF_OOB, TXF_WAIT_CRC, TXF_EMBED_CRC1, TXF_EMBED_CRC2, TXF_EMBED_CRC3, TXF_GAP, TXF_PAD, TXF_ABORT);
 
 -- general signals
   signal state    : t_tx_framer_state;
@@ -146,13 +147,10 @@ architecture behavioral of ep_tx_framer is
   signal snk_out : t_wrf_sink_out;
 
   signal abort_now : std_logic;
-  
-  type t_header_processor_rval is record
-    next_state : t_tx_framer_state;
-    sof        : std_logic;
-    data       : std_logic_vector(15 downto 0);
-    valid      : std_logic;
-  end record;
+  signal stall_int : std_logic;
+  signal stall_int_d0: std_logic;
+  signal untagging : std_logic;
+
 
 
   function b2s (x : boolean)
@@ -164,92 +162,46 @@ architecture behavioral of ep_tx_framer is
       return '0';
     end if;
   end function;
+
+
+
+  signal vut_rd_vid    : integer;
+  signal vut_wr_vid    : integer;
+  signal vut_untag     : std_logic;
+  signal vut_is_802_1q : std_logic;
+
+  signal vut_stored_tag       : std_logic_vector(15 downto 0);
+  signal vut_stored_ethertype : std_logic_vector(15 downto 0);
   
   
-  impure function f_process_packet_header(
-    snk         : t_wrf_sink_in;
-    offset      : unsigned(3 downto 0);
-    has_mac     : std_logic;
-    is_pause    : std_logic;
-    pause_delay : std_logic_vector(15 downto 0)
-    ) return t_header_processor_rval is
-    variable rv : t_header_processor_rval;
-  begin
+begin  -- behavioral
 
-    rv.next_state := TXF_HEADER;
-    -- pause frame generation
-    if(is_pause = '1') then
-      rv.valid := '1';
-      case offset is
-        when x"0" =>
-          rv.sof  := '1';
-          rv.data := x"0180";
-        when x"1"=>
-          rv.sof  := '0';
-          rv.data := x"c200";
-        when x"2" =>
-          rv.data := x"0001";
-        when x"3" =>
-          rv.data := regs_b.mach_o;
-        when x"4" =>
-          rv.data := regs_b.macl_o(31 downto 16);
-        when x"5" =>
-          rv.data := regs_b.macl_o(15 downto 0);
-        when x"6" =>
-          rv.data := x"8808";
-        when x"7" =>
-          rv.data       := pause_delay;
-          rv.next_state := TXF_PAD;
-        when others => null;
-      end case;
-    else
-      -- SRC MAC insertion, VLAN stuff
-      rv.valid := '1';
-
-      case offset is
-        when x"0" =>
-          rv.sof  := '1';
-          rv.data := snk.dat;
-        when x"1" =>
-          rv.sof  := '1';
-          rv.data := snk.dat;
-        when x"2" =>
-          rv.data := snk.dat;
-        when x"3" =>
-          if(has_mac = '0') then
-            rv.data := regs_b.mach_o;
-          else
-            rv.data := snk.dat;
-          end if;
-          
-        when x"4" =>
-          if(has_mac = '0') then
-            rv.data := regs_b.macl_o(31 downto 16);
-          else
-            rv.data := snk.dat;
-          end if;
-
-        when x"5" =>
-          if(has_mac = '0') then
-            rv.data := regs_b.macl_o(15 downto 0);
-          else
-            rv.data := snk.dat;
-          end if;
-
-          -- no VLAN tagging support - just treat the eventual 802.1q header as
-          -- a payload
---          if (not g_with_vlans) then
-          rv.next_state := TXF_DATA;
---          end if;
-        when others => null;
-      end case;
-    end if;
-    return rv;
-  end function;
+  -----------------------------------------------------------------------------
+  -- VLAN Functionality
+  -----------------------------------------------------------------------------
 
+  gen_with_vlans : if(g_with_vlans) generate
 
-begin  -- behavioral
+    vut_rd_vid <= to_integer(unsigned(snk_i.dat(11 downto 0)));
+    vut_wr_vid <= to_integer(unsigned(regs_b.vcr1_vid_o));
 
+    p_untagged_set_access : process(clk_sys_i)
+      variable vut_set : std_logic_vector(4095 downto 0);
+    begin
+      if rising_edge(clk_sys_i) then
+        if(regs_b.vcr1_value_wr_o = '1') then
+          vut_set(vut_wr_vid) := regs_b.vcr1_value_o;
+        end if;
+
+        if(snk_valid = '1') then
+          vut_untag <= vut_set(vut_rd_vid);
+        end if;
+      end if;
+    end process;
+
+  end generate gen_with_vlans;
+
+  regs_b <= c_ep_registers_init_value;
 
   crc_gen_reset <= '1' when rst_n_i = '0' else ((sof_p1 and (not tx_pause_mode)) or crc_gen_force_reset);
 
@@ -274,6 +226,7 @@ begin  -- behavioral
       match_o => open,
       crc_o   => crc_value);
 
+  
   p_detect_frame : process(clk_sys_i)
   begin
     if rising_edge(clk_sys_i) then
@@ -287,7 +240,7 @@ begin  -- behavioral
 
   sof_p1    <= not snk_cyc_d0 and snk_i.cyc;
   eof_p1    <= snk_cyc_d0 and not snk_i.cyc;
-  snk_valid <= (snk_i.cyc and snk_i.stb and snk_i.we and not snk_out.stall);
+  snk_valid <= (snk_i.cyc and snk_i.stb and snk_i.we) and not stall_int;
 
   decoded_status <= f_unmarshall_wrf_status(snk_i.dat);
 
@@ -302,8 +255,8 @@ begin  -- behavioral
       
       if rst_n_i = '0' then
         stored_status.has_smac <= '0';
-        stored_status.is_hp   <= '0';
-        stored_status.has_crc <= '0';
+        stored_status.is_hp    <= '0';
+        stored_status.has_crc  <= '0';
       else
         if(snk_valid = '1' and snk_i.adr = c_WRF_STATUS) then
           stored_status <= decoded_status;
@@ -328,7 +281,7 @@ begin  -- behavioral
                 oob_state <= OOB_1;
               end if;
             when OOB_1 =>
-              if(snk_valid = '1' and  snk_i.adr = c_WRF_OOB) then
+              if(snk_valid = '1' and snk_i.adr = c_WRF_OOB) then
                 oob.oob_type <= snk_i.dat(15 downto 12);
                 oob_state    <= OOB_2;
               end if;
@@ -345,7 +298,6 @@ begin  -- behavioral
   end generate gen_ts_supported;
 
   p_tx_fsm : process (clk_sys_i)
-    variable phdr : t_header_processor_rval;
   begin  -- process
     if rising_edge(clk_sys_i) then
       if(rst_n_i = '0') then
@@ -364,7 +316,6 @@ begin  -- behavioral
 
         snk_out.err <= '0';
         snk_out.rty <= '0';
-        snk_out.ack <= '0';
 
         tx_pause_ack_o <= '0';
 
@@ -372,6 +323,7 @@ begin  -- behavioral
         crc_gen_force_reset <= '0';
 
         oob_fid_stb_o <= '0';
+
       else
 
         -- we are in the middle of the frame and the framer has got suddenly
@@ -385,10 +337,10 @@ begin  -- behavioral
           q_valid  <= '0';
           
         elsif (state /= TXF_IDLE and pcs_error_i = '1') then
-          tx_ready  <= '0';
-          counter   <= (others => '0');
-          snk_o.err <= '1';
-          state     <= TXF_GAP;
+          tx_ready    <= '0';
+          counter     <= (others => '0');
+          snk_out.err <= '1';
+          state       <= TXF_GAP;
         else
 
           case state is
@@ -399,21 +351,23 @@ begin  -- behavioral
 
             when TXF_IDLE =>            -- idle state - wait for the next frame
 
-              snk_o.err <= '0';
-              snk_o.rty <= '0';
+              snk_out.err <= '0';
+              snk_out.rty <= '0';
 
               q_abort <= '0';
 
               tx_ready <= tx_flow_enable_i;
 
-              -- Check start-of-frame and send-pause signals and eventually
-              -- commence frame transmission
+                                        -- Check start-of-frame and send-pause signals and eventually
+                                        -- commence frame transmission
 
-              if((sof_p1 = '1' or tx_pause_i = '1') and regs_b.ecr_tx_en_o = '1') then
-                -- enable writing to PCS FIFO
+              if(pcs_dreq_i = '1' and (sof_p1 = '1' or tx_pause_i = '1') and regs_b.ecr_tx_en_o = '1') then
+                                        -- enable writing to PCS FIFO
+                q_sof      <= '1';
                 q_eof      <= '0';
                 write_mask <= '1';
 
+
                 tx_pause_ack_o <= tx_pause_i;
                 tx_pause_mode  <= tx_pause_i;
                 tx_pause_delay <= tx_pause_delay_i;
@@ -422,7 +376,12 @@ begin  -- behavioral
                 crc_gen_enable_mask <= '1';
 
                 counter <= (others => '0');
-                state   <= TXF_HEADER;
+                if(tx_pause_i = '1') then
+                  state <= TXF_PAUSE;
+                else
+                  state <= TXF_ADDR;
+                end if;
+                
               else
                 write_mask <= '0';
                 q_valid    <= '0';
@@ -434,30 +393,145 @@ begin  -- behavioral
 -------------------------------------------------------------------------------
 -- TX FSM state HEADER: processes the frame header
 -------------------------------------------------------------------------------
-
-            when TXF_HEADER =>
-              tx_pause_ack_o <= '0';
-              tx_ready       <= not tx_pause_mode;
-
+            when TXF_PAUSE =>
+              tx_ready            <= '0';
+              tx_pause_ack_o      <= '0';
+              q_sof               <= '0';
               crc_gen_force_reset <= '0';
 
-              if((snk_valid = '1' and snk_i.adr = c_WRF_DATA) or tx_pause_mode = '1') then
-
-                phdr := f_process_packet_header(snk_i, counter(3 downto 0), stored_status.has_smac, tx_pause_mode, tx_pause_delay);
-
+              if(pcs_dreq_i = '1') then
+                q_valid <= '1';
+                case counter(3 downto 0) is
+                  when x"0" => q_data <= x"0180";
+                  when x"1" => q_data <= x"c200";
+                  when x"2" => q_data <= x"0001";
+                  when x"3" => q_data <= regs_b.mach_o;
+                  when x"4" => q_data <= regs_b.macl_o(31 downto 16);
+                  when x"5" => q_data <= regs_b.macl_o(15 downto 0);
+                  when x"6" => q_data <= x"8808";
+                  when x"7" => q_data <= tx_pause_delay;
+                               state <= TXF_PAD;
+                  when others => null;
+                end case;
                 counter <= counter + 1;
+              else
+                q_valid <= '0';
+              end if;
 
-                q_sof   <= phdr.sof;
-                q_data  <= phdr.data;
-                q_valid <= phdr.valid;
+            when TXF_ADDR =>
+              q_sof               <= '0';
+              crc_gen_force_reset <= '0';
 
-                state <= phdr.next_state;
+              if (pcs_dreq_i = '1' and snk_valid = '1' and snk_i.adr = c_WRF_DATA) then
+                counter <= counter + 1;
+                case counter(3 downto 0) is
+                  when x"0" => q_data <= snk_i.dat; q_valid <= '1';
+                  when x"1" => q_data <= snk_i.dat; q_valid <= '1';
+                  when x"2" => q_data <= snk_i.dat; q_valid <= '1';
+                  when x"3" =>
+                    if(stored_status.has_smac = '1') then
+                      q_data <= snk_i.dat;
+                    else
+                      q_data <= regs_b.mach_o;
+                    end if;
+                    q_valid <= '1';
+                  when x"4" =>
+                    if(stored_status.has_smac = '1') then
+                      q_data <= snk_i.dat;
+                    else
+                      q_data <= regs_b.macl_o(31 downto 16);
+                    end if;
+                    q_valid <= '1';
+                  when x"5" =>
+                    if(stored_status.has_smac = '1') then
+                      q_data <= snk_i.dat;
+                    else
+                      q_data <= regs_b.macl_o(15 downto 0);
+                    end if;
+                    q_valid <= '1';
+                  when x"6" =>
+                    if(g_with_vlans and snk_i.dat = x"8100") then  -- 802.1q frame
+                      q_data  <= (others => 'X');
+                      q_valid <= '0';
+                      counter <= (others => '0');
+                      state   <= TXF_QHEADER;
+                    else
+                      q_data  <= snk_i.dat;
+                      q_valid <= '1';
+                      state   <= TXF_DATA;
+                    end if;
+                  when others => null;
+                end case;
                 
               else
-                q_sof   <= '0';         -- tx_valid == 0
                 q_valid <= '0';
+                q_data  <= (others => 'X');
               end if;
 
+            when TXF_QHEADER =>
+              if(g_with_vlans and pcs_dreq_i = '1') then
+                case counter(3 downto 0) is
+                  when x"0" =>          -- TPID
+                    if(snk_valid = '1' and snk_i.adr = c_WRF_DATA) then
+                      counter <= counter + 1;
+                      vut_stored_ethertype <= snk_i.dat;
+                    end if;
+
+                  when x"1" =>          -- VLAN Tag
+                    if(snk_valid = '1' and snk_i.adr = c_WRF_DATA) then
+                      counter             <= counter + 1;
+                      vut_stored_tag <= snk_i.dat;
+                      tx_ready <= '0';
+                    end if;
+
+                  when x"2" =>
+
+                    if(vut_untag = '1' or untagging = '1') then  -- VID is in the untagged set
+                      untagging           <= '1';
+                      tx_ready <= '1';
+                      q_data              <= vut_stored_ethertype;
+                      q_valid             <= '1';
+                      counter  <= counter  +1;
+                    else
+                      untagging           <= '0';
+                      q_data              <= x"8100";
+                      q_valid             <= '1';
+                      counter             <= counter + 1;
+                    end if;
+
+                  when x"3" =>
+                    if(untagging = '1') then
+                      if(snk_valid = '1') then
+                        q_data <= snk_i.dat;
+                        q_valid <= '1';
+                        state <= TXF_DATA;
+                      else
+                        q_valid <= '0';
+                      end if;
+                    elsif(untagging = '0') then
+                      q_data  <= vut_stored_ethertype;
+                      q_valid <= '1';
+                      counter <= counter + 1;
+                    else
+                      q_valid  <= '0';
+                      tx_ready <= '1';
+                    end if;
+                    
+                  when x"4" =>
+                    if(untagging = '0') then
+                      q_data   <= vut_stored_tag;
+                      q_valid  <= '1';
+                      tx_ready <= '1';
+                      state <= TXF_DATA;
+                    end if;
+                  when others => null;
+                end case;
+              else
+                q_valid <= '0';
+              end if;
+
+
+
 -------------------------------------------------------------------------------
 -- TX FSM state PAD: pads a pause frame to 64 bytes.
 -------------------------------------------------------------------------------
@@ -470,29 +544,31 @@ begin  -- behavioral
                 state   <= TXF_WAIT_CRC;
                 q_valid <= '0';
               else
-                q_data <= x"0000";
+                q_data  <= x"0000";
                 q_valid <= '1';
               end if;
 
 -------------------------------------------------------------------------------
 -- TX FSM state DATA: trasmits frame payload
 -------------------------------------------------------------------------------
-              
+
             when TXF_DATA =>
 
+              tx_ready <= '1';
+              
               if(eof_p1 = '1') then
 
                 if(stored_status.has_crc = '0') then
                   state <= TXF_WAIT_CRC;
                 else
-                  q_eof <= '1';
+                  q_eof   <= '1';
                   counter <= (others => '0');
                   state   <= TXF_GAP;
                 end if;
 
                 tx_ready <= '0';
 
-                -- check if we have an OOB block
+                                        -- check if we have an OOB block
                 if(oob.valid = '1' and oob.oob_type = c_WRF_OOB_TYPE_TX and g_with_timestamper) then
                   oob_fid_stb_o <= '1';
                 end if;
@@ -501,9 +577,9 @@ begin  -- behavioral
               if(snk_valid = '1' and snk_i.adr = c_WRF_DATA) then
                 q_data     <= snk_i.dat;
                 q_valid    <= '1';
-                q_bytesel  <= not snk_i.sel(1);  --tx_bytesel_i;
-                write_mask <= snk_i.sel(1);
-                odd_length <= not snk_i.sel(1);  --tx_bytesel_i;
+                q_bytesel  <= not snk_i.sel(0);  --tx_bytesel_i;
+                write_mask <= snk_i.sel(0);
+                odd_length <= not snk_i.sel(0);  --tx_bytesel_i;
               else
                 q_valid <= '0';
               end if;
@@ -511,13 +587,13 @@ begin  -- behavioral
 -------------------------------------------------------------------------------
 -- TX FSM states: WAIT_CRC, EMBED_CRC: dealing with frame checksum field
 -------------------------------------------------------------------------------            
-              
+
             when TXF_WAIT_CRC =>
               oob_fid_stb_o       <= '0';
               q_valid             <= '0';
               state               <= TXF_EMBED_CRC1;
               crc_gen_enable_mask <= '0';
-              
+
             when TXF_EMBED_CRC1 =>
               if(odd_length = '1') then  -- CRC at odd position
                 q_data(7 downto 0) <= crc_value(31 downto 24);
@@ -529,7 +605,7 @@ begin  -- behavioral
               write_mask <= '1';
               q_bytesel  <= '0';
               state      <= TXF_EMBED_CRC2;
-              
+
             when TXF_EMBED_CRC2 =>
               if(odd_length = '1') then  -- CRC at odd position
                 q_data(15 downto 0) <= crc_value(23 downto 8);
@@ -555,13 +631,14 @@ begin  -- behavioral
 -------------------------------------------------------------------------------
 -- TX FSM states: WAIT_CRC, EMBED_CRC: dealing with frame checksum field
 -------------------------------------------------------------------------------            
-              
+
             when TXF_GAP =>
-              q_abort   <= '0';
-              q_valid   <= '0';
-              snk_o.err <= '0';
-              snk_o.rty <= '0';
-              q_bytesel <= '0';
+              q_eof       <= '0';
+              q_abort     <= '0';
+              q_valid     <= '0';
+              snk_out.err <= '0';
+              snk_out.rty <= '0';
+              q_bytesel   <= '0';
 
               if(counter = c_IFG_LENGTH) then
                 if(pcs_busy_i = '0') then
@@ -592,7 +669,10 @@ begin  -- behavioral
 
   oob_fid_value_o <= oob.frame_id;
 
-  snk_out.stall <= not (tx_ready or (not regs_b.ecr_tx_en_o));  -- /dev/null if disabled
+
+  stall_int <= not (pcs_dreq_i and tx_ready) and regs_b.ecr_tx_en_o;  -- /dev/null if disabled
+
+  snk_out.stall <= stall_int;
 
   p_gen_ack : process(clk_sys_i)
   begin
@@ -610,7 +690,7 @@ begin  -- behavioral
     q_bytesel,
     q_abort);
 
-  pcs_valid_o <= q_sof or (q_valid and write_mask);
+  pcs_valid_o <= q_sof or q_eof or (q_valid and write_mask);
 
 end behavioral;