diff --git a/hdl/pcie_altera.vhd b/hdl/pcie_altera.vhd
index cf1e92f1a3bbd89801fb203e0aece2f3ad33ef8f..321cf4eb3725b73fa23d3cb338bbf5a23f6e9ca8 100644
--- a/hdl/pcie_altera.vhd
+++ b/hdl/pcie_altera.vhd
@@ -31,7 +31,8 @@ entity pcie_altera is
     -- push TX data
     tx_en_i       : in  std_logic; -- may never exceed alloc_i
     tx_dat_i      : in  std_logic_vector(31 downto 0);
-    tx_eop_i      : in  std_logic); -- Mark last strobe
+    tx_eop_i      : in  std_logic; -- Mark last strobe
+    tx_pad_i      : in  std_logic);
 end pcie_altera;
 
 architecture rtl of pcie_altera is
@@ -256,7 +257,7 @@ architecture rtl of pcie_altera is
   signal r_delay_ready : std_logic_vector(1 downto 0); -- length must equal the latency of the Avalon TX bus
   
   signal s_queue_wdat : std_logic_vector(63 downto 0);
-  signal s_queue_wen, s_64to32_full, r_tx32_full, r_pad : std_logic;
+  signal s_queue_wen, s_64to32_full, r_tx32_full : std_logic;
   
   constant zero32 : std_logic_vector(31 downto 0) := (others => '0');
   signal r_tx_dat0 : std_logic_vector(31 downto 0);
@@ -630,7 +631,7 @@ begin
     (zero32 & tx_dat_i) when r_tx32_full = '0' else
     (tx_dat_i & r_tx_dat0);
   
-  s_64to32_full <= r_tx32_full or r_pad or tx_eop_i;
+  s_64to32_full <= r_tx32_full or tx_pad_i or tx_eop_i;
   s_queue_wen <= tx_en_i and s_64to32_full;
   
   tx_data32 : process(core_clk_out)
@@ -639,7 +640,6 @@ begin
       if rstn = '0' then
         r_tx_dat0 <= (others => '0');
         r_tx32_full <= '0';
-        r_pad <= '0';
       else
         if tx_en_i = '1' then
           r_tx_dat0 <= tx_dat_i;
diff --git a/hdl/pcie_tlp.vhd b/hdl/pcie_tlp.vhd
index 9c18236bb88c1f60f4c7528335015e6c7515642e..0f324029f8e2d3cfded3431d786716f0a9fb95f2 100644
--- a/hdl/pcie_tlp.vhd
+++ b/hdl/pcie_tlp.vhd
@@ -17,6 +17,7 @@ entity pcie_tlp is
     tx_en_o       : out std_logic;
     tx_dat_o      : out std_logic_vector(31 downto 0);
     tx_eop_o      : out std_logic;
+    tx_pad_o      : out std_logic;
     
     cfg_busdev_i  : in  std_logic_vector(12 downto 0);
     
@@ -383,6 +384,7 @@ begin
         r_tx_en <= '0';
         r_tx_alloc <= '0';
         tx_eop_o <= '0';
+        tx_pad_o <= '0';
         tx_dat_o <= (others => '-');
         
         tx_state <= next_state;
@@ -411,6 +413,7 @@ begin
               r_tx_alloc <= '1';
               r_tx_en <= '1';
             end if;
+            tx_pad_o <= not r_address(2);
           when c_block => 
             null;
           when c_queue => 
diff --git a/hdl/pcie_wb.vhd b/hdl/pcie_wb.vhd
index 6e01b2db4583cbf0ec1e2c86d85d37b8d68b0f27..180dc2f3f3fb8875e40185b7d10f55ab991a76e5 100644
--- a/hdl/pcie_wb.vhd
+++ b/hdl/pcie_wb.vhd
@@ -48,7 +48,7 @@ architecture rtl of pcie_wb is
   signal rx_wb_dat : std_logic_vector(31 downto 0);
   signal rx_wb_bar : std_logic_vector(2 downto 0);
   
-  signal tx_rdy, tx_alloc, tx_en, tx_eop : std_logic;
+  signal tx_rdy, tx_alloc, tx_en, tx_eop, tx_pad : std_logic;
   signal tx_dat : std_logic_vector(31 downto 0);
   
   signal wb_stb_o, wb_we_o, wb_ack_i : std_logic;
@@ -97,7 +97,8 @@ begin
     tx_alloc_i    => tx_alloc,
     tx_en_i       => tx_en,
     tx_dat_i      => tx_dat,
-    tx_eop_i      => tx_eop);
+    tx_eop_i      => tx_eop,
+    tx_pad_i      => tx_pad);
   
   pcie_logic : pcie_tlp port map(
     clk_i         => wb_clk,
@@ -113,6 +114,7 @@ begin
     tx_en_o       => tx_en,
     tx_dat_o      => tx_dat,
     tx_eop_o      => tx_eop,
+    tx_pad_o      => tx_pad,
     
     cfg_busdev_i  => cfg_busdev,
       
diff --git a/hdl/pcie_wb_pkg.vhd b/hdl/pcie_wb_pkg.vhd
index 45fe9ab6f3a393c3a703cd4415ee05246a69e183..6480349dc81148682cd8b9f967df5e25bba58fd8 100644
--- a/hdl/pcie_wb_pkg.vhd
+++ b/hdl/pcie_wb_pkg.vhd
@@ -32,7 +32,8 @@ package pcie_wb_pkg is
       -- push TX data
       tx_en_i       : in  std_logic; -- may never exceed alloc_i
       tx_dat_i      : in  std_logic_vector(31 downto 0);
-      tx_eop_i      : in  std_logic); -- Mark last strobe
+      tx_eop_i      : in  std_logic; -- Mark last strobe
+      tx_pad_i      : in  std_logic); -- Is the data misaligned?
   end component;
   
   component pcie_tlp is
@@ -50,6 +51,7 @@ package pcie_wb_pkg is
       tx_en_o       : out std_logic;
       tx_dat_o      : out std_logic_vector(31 downto 0);
       tx_eop_o      : out std_logic;
+      tx_pad_o      : out std_logic;
       
       cfg_busdev_i  : in  std_logic_vector(12 downto 0);