From 49548c2c77fd50631ccded6ce572d4e2c3720754 Mon Sep 17 00:00:00 2001
From: David Sidler <david.sidler@inf.ethz.ch>
Date: Sun, 8 Sep 2019 12:14:02 +0200
Subject: [PATCH] fixing 10G tcp & udp

---
 hdl/common/tcp_stack.sv                       | 77 +++++++++++--------
 hdl/common/udp_stack.sv                       | 76 +++++++++++++++---
 hls/hash_table/hash_table.hpp                 |  4 +-
 hls/ip_handler/ip_handler.cpp                 |  2 +-
 hls/toe/dummy_memory.hpp                      | 10 ++-
 hls/toe/make.tcl.in                           | 47 +++++------
 hls/toe/rx_engine/rx_engine.cpp               | 18 ++---
 hls/toe/toe.cpp                               |  8 +-
 hls/toe/toe_config.hpp.in                     |  6 ++
 hls/toe/toe_tb.cpp                            | 27 +++----
 hls/toe/tx_app_stream_if/tx_app_stream_if.cpp |  9 +--
 hls/udp/udp.cpp                               |  1 +
 12 files changed, 174 insertions(+), 111 deletions(-)

diff --git a/hdl/common/tcp_stack.sv b/hdl/common/tcp_stack.sv
index c98586b..436c9a7 100755
--- a/hdl/common/tcp_stack.sv
+++ b/hdl/common/tcp_stack.sv
@@ -122,8 +122,11 @@ axis_meta #(.WIDTH(16))     axis_listen_port();
 axis_meta #(.WIDTH(8))      axis_listen_port_status();
 axis_meta #(.WIDTH(48))     axis_open_connection();
 axis_meta #(.WIDTH(24))     axis_open_status();
+axis_meta #(.WIDTH(16))     axis_close_connection();
 
+axis_meta #(.WIDTH(88))     axis_notifications();
 axis_meta #(.WIDTH(32))     axis_read_package();
+axis_meta #(.WIDTH(16))     axis_rx_metadata();
 axis_meta #(.WIDTH(32))     axis_tx_metadata();
 
 
@@ -269,9 +272,9 @@ toe_ip toe_inst (
 .m_axis_listen_port_rsp_V_TDATA(axis_listen_port_status.data),
 
 // notification & read request
-.m_axis_notification_V_TVALID(m_axis_notifications.valid),
-.m_axis_notification_V_TREADY(m_axis_notifications.ready),
-.m_axis_notification_V_TDATA(m_axis_notifications.data),
+.m_axis_notification_V_TVALID(axis_notifications.valid),
+.m_axis_notification_V_TREADY(axis_notifications.ready),
+.m_axis_notification_V_TDATA(axis_notifications.data),
 .s_axis_rx_data_req_V_TVALID(axis_read_package.valid),
 .s_axis_rx_data_req_V_TREADY(axis_read_package.ready),
 .s_axis_rx_data_req_V_TDATA(axis_read_package.data),
@@ -283,14 +286,14 @@ toe_ip toe_inst (
 .m_axis_open_conn_rsp_V_TVALID(axis_open_status.valid),
 .m_axis_open_conn_rsp_V_TREADY(axis_open_status.ready),
 .m_axis_open_conn_rsp_V_TDATA(axis_open_status.data),
-.s_axis_close_conn_req_V_V_TVALID(s_axis_close_connection.valid),
-.s_axis_close_conn_req_V_V_TREADY(s_axis_close_connection.ready),
-.s_axis_close_conn_req_V_V_TDATA(s_axis_close_connection.data),
+.s_axis_close_conn_req_V_V_TVALID(axis_close_connection.valid),
+.s_axis_close_conn_req_V_V_TREADY(axis_close_connection.ready),
+.s_axis_close_conn_req_V_V_TDATA(axis_close_connection.data),
 
 // rx data
-.m_axis_rx_data_rsp_metadata_V_V_TVALID(m_axis_rx_metadata.valid),
-.m_axis_rx_data_rsp_metadata_V_V_TREADY(m_axis_rx_metadata.ready),
-.m_axis_rx_data_rsp_metadata_V_V_TDATA(m_axis_rx_metadata.data),
+.m_axis_rx_data_rsp_metadata_V_V_TVALID(axis_rx_metadata.valid),
+.m_axis_rx_data_rsp_metadata_V_V_TREADY(axis_rx_metadata.ready),
+.m_axis_rx_data_rsp_metadata_V_V_TDATA(axis_rx_metadata.data),
 .m_axis_rx_data_rsp_TVALID(m_axis_rx_data.valid),
 .m_axis_rx_data_rsp_TREADY(m_axis_rx_data.ready),
 .m_axis_rx_data_rsp_TDATA(m_axis_rx_data.data),
@@ -685,7 +688,7 @@ end
 
 
 
-// Register slices to avoid combinatorial loops created by HLS
+// Register slices to avoid combinatorial loops created by HLS due to the new axis INTERFACE (enforced since 19.1)
 
 axis_register_slice_16 listen_port_slice (
   .aclk(net_clk),                    // input wire aclk
@@ -731,6 +734,28 @@ axis_register_slice_24 open_status_slice (
   .m_axis_tdata(m_axis_open_status.data)    // output wire [7 : 0] m_axis_tdata
 );
 
+axis_register_slice_16 close_connection_slice (
+  .aclk(net_clk),                    // input wire aclk
+  .aresetn(net_aresetn),              // input wire aresetn
+  .s_axis_tvalid(s_axis_close_connection.valid),  // input wire s_axis_tvalid
+  .s_axis_tready(s_axis_close_connection.ready),  // output wire s_axis_tready
+  .s_axis_tdata(s_axis_close_connection.data),    // input wire [7 : 0] s_axis_tdata
+  .m_axis_tvalid(axis_close_connection.valid),  // output wire m_axis_tvalid
+  .m_axis_tready(axis_close_connection.ready),  // input wire m_axis_tready
+  .m_axis_tdata(axis_close_connection.data)    // output wire [7 : 0] m_axis_tdata
+);
+
+axis_register_slice_88 notification_slice (
+  .aclk(net_clk),                    // input wire aclk
+  .aresetn(net_aresetn),              // input wire aresetn
+  .s_axis_tvalid(axis_notifications.valid),  // input wire s_axis_tvalid
+  .s_axis_tready(axis_notifications.ready),  // output wire s_axis_tready
+  .s_axis_tdata(axis_notifications.data),    // input wire [7 : 0] s_axis_tdata
+  .m_axis_tvalid(m_axis_notifications.valid),  // output wire m_axis_tvalid
+  .m_axis_tready(m_axis_notifications.ready),  // input wire m_axis_tready
+  .m_axis_tdata(m_axis_notifications.data)    // output wire [7 : 0] m_axis_tdata
+);
+
 axis_register_slice_32 read_package_slice (
   .aclk(net_clk),                    // input wire aclk
   .aresetn(net_aresetn),              // input wire aresetn
@@ -742,6 +767,16 @@ axis_register_slice_32 read_package_slice (
   .m_axis_tdata(axis_read_package.data)    // output wire [7 : 0] m_axis_tdata
 );
 
+axis_register_slice_16 axis_rx_metadata_slice (
+  .aclk(net_clk),                    // input wire aclk
+  .aresetn(net_aresetn),              // input wire aresetn
+  .s_axis_tvalid(axis_rx_metadata.valid),  // input wire s_axis_tvalid
+  .s_axis_tready(axis_rx_metadata.ready),  // output wire s_axis_tready
+  .s_axis_tdata(axis_rx_metadata.data),    // input wire [7 : 0] s_axis_tdata
+  .m_axis_tvalid(m_axis_rx_metadata.valid),  // output wire m_axis_tvalid
+  .m_axis_tready(m_axis_rx_metadata.ready),  // input wire m_axis_tready
+  .m_axis_tdata(m_axis_rx_metadata.data)    // output wire [7 : 0] m_axis_tdata
+);
 axis_register_slice_32 axis_tx_metadata_slice (
   .aclk(net_clk),                    // input wire aclk
   .aresetn(net_aresetn),              // input wire aresetn
@@ -771,28 +806,6 @@ always @(posedge net_clk) begin
     end
 end
 
-/*ila_mixed tco_debug (
-	.clk(net_clk), // input wire clk
-
-	.probe0(s_axis_mem_read_data[ddrPortNetworkTx].valid), // input wire [0:0]  probe0  
-	.probe1(s_axis_mem_read_data[ddrPortNetworkTx].ready), // input wire [0:0]  probe1 
-	.probe2(m_axis_tx_data.valid), // input wire [0:0]  probe2 
-	.probe3(m_axis_tx_data.ready), // input wire [0:0]  probe3 
-	.probe4(m_axis_mem_read_cmd[ddrPortNetworkTx].valid), // input wire [0:0]  probe4 
-	.probe5(m_axis_mem_read_cmd[ddrPortNetworkTx].ready), // input wire [0:0]  probe5 
-	.probe6(m_axis_rx_metadata.valid), // input wire [0:0]  probe6 
-	.probe7(s_axis_mem_read_data[ddrPortNetworkTx].last), // input wire [0:0]  probe7 
-	.probe8(read_cmd_counter), // input wire [15:0]  probe8 
-	.probe9(read_pkg_counter), // input wire [15:0]  probe9 
-	.probe10(s_axis_mem_read_data[ddrPortNetworkTx].keep[31:16]), // input wire [15:0]  probe10 
-	.probe11(m_axis_mem_read_cmd[ddrPortNetworkTx].address[20:16]),// input wire [15:0]  probe11 
-	.probe12(s_axis_mem_read_data[ddrPortNetworkTx].keep[15:0]),
-	//.probe12({s_axis_tx_data.ready, s_axis_tx_data.valid, s_axis_tx_metadata.ready, s_axis_tx_metadata.valid, m_axis_open_status.ready, m_axis_open_status.valid, m_axis_rx_data.last, m_axis_rx_data.ready, m_axis_rx_data.valid, m_axis_rx_metadata.ready, m_axis_rx_metadata.valid, s_axis_read_package.ready, s_axis_read_package.valid, m_axis_notifications.ready, m_axis_notifications.valid, s_axis_rx_data.last, m_axis_tx_data.last}), // input wire [15:0]  probe12 
-	.probe13({m_axis_tx_data.last, m_axis_mem_write_data[ddrPortNetworkTx].last, m_axis_mem_write_data[ddrPortNetworkTx].ready, m_axis_mem_write_data[ddrPortNetworkTx].valid, s_axis_mem_read_sts[ddrPortNetworkTx].ready, s_axis_mem_read_sts[ddrPortNetworkTx].valid, s_axis_mem_write_sts[ddrPortNetworkTx].ready, s_axis_mem_write_sts[ddrPortNetworkTx].valid, axis_rxwrite_data.last, axis_rxwrite_data.ready, axis_rxwrite_data.valid, axis_rxread_data.last, axis_rxread_data.ready, axis_rxread_data.valid, m_axis_mem_write_cmd[ddrPortNetworkTx].ready, m_axis_mem_write_cmd[ddrPortNetworkTx].valid, m_axis_mem_read_cmd[ddrPortNetworkTx].ready, m_axis_mem_read_cmd[ddrPortNetworkTx].valid}), // input wire [15:0]  probe13 
-	.probe14(m_axis_mem_read_cmd[ddrPortNetworkTx].address[15:0]), // input wire [15:0]  probe14 
-	.probe15(m_axis_mem_read_cmd[ddrPortNetworkTx].length[15:0]) // input wire [15:0]  probe15
-);*/
-
 end
 else begin
 assign s_axis_rx_data.ready = 1'b1;
diff --git a/hdl/common/udp_stack.sv b/hdl/common/udp_stack.sv
index 75ccf9c..b661912 100755
--- a/hdl/common/udp_stack.sv
+++ b/hdl/common/udp_stack.sv
@@ -54,11 +54,16 @@ module udp_stack #(
 generate
 if (UDP_EN == 1) begin
 
+axis_meta #(.WIDTH(48))         axis_ip_to_udp_slice_meta();
 axis_meta #(.WIDTH(48))         axis_ip_to_udp_meta();
+axis_meta #(.WIDTH(48))         axis_udp_to_ip_slice_meta();
 axis_meta #(.WIDTH(48))         axis_udp_to_ip_meta();
 
 axi_stream #(.WIDTH(WIDTH))       axis_ip_to_udp_data();
 axi_stream #(.WIDTH(WIDTH))       axis_udp_to_ip_data();
+
+axis_meta #(.WIDTH(176))    axis_udp_rx_metadata();
+axis_meta #(.WIDTH(176))    axis_udp_tx_metadata();
  
  
 ipv4_ip ipv4_inst (
@@ -70,9 +75,9 @@ ipv4_ip ipv4_inst (
    .s_axis_rx_data_TDATA(s_axis_rx_data.data),
    .s_axis_rx_data_TKEEP(s_axis_rx_data.keep),
    .s_axis_rx_data_TLAST(s_axis_rx_data.last),
-   .m_axis_rx_meta_V_TVALID(axis_ip_to_udp_meta.valid),
-   .m_axis_rx_meta_V_TREADY(axis_ip_to_udp_meta.ready),
-   .m_axis_rx_meta_V_TDATA(axis_ip_to_udp_meta.data),
+   .m_axis_rx_meta_V_TVALID(axis_ip_to_udp_slice_meta.valid),
+   .m_axis_rx_meta_V_TREADY(axis_ip_to_udp_slice_meta.ready),
+   .m_axis_rx_meta_V_TDATA(axis_ip_to_udp_slice_meta.data),
    .m_axis_rx_data_TVALID(axis_ip_to_udp_data.valid),
    .m_axis_rx_data_TREADY(axis_ip_to_udp_data.ready),
    .m_axis_rx_data_TDATA(axis_ip_to_udp_data.data),
@@ -96,6 +101,28 @@ ipv4_ip ipv4_inst (
    .ap_clk(net_clk),
    .ap_rst_n(net_aresetn)
  );
+
+axis_register_slice_48 rx_ip_meta_slice(
+ .aclk(net_clk),
+ .aresetn(net_aresetn),
+ .s_axis_tvalid(axis_ip_to_udp_slice_meta.valid),
+ .s_axis_tready(axis_ip_to_udp_slice_meta.ready),
+ .s_axis_tdata(axis_ip_to_udp_slice_meta.data),
+ .m_axis_tvalid(axis_ip_to_udp_meta.valid),
+ .m_axis_tready(axis_ip_to_udp_meta.ready),
+ .m_axis_tdata(axis_ip_to_udp_meta.data)
+);
+axis_register_slice_48 tx_ip_meta_slice(
+ .aclk(net_clk),
+ .aresetn(net_aresetn),
+ .s_axis_tvalid(axis_udp_to_ip_slice_meta.valid),
+ .s_axis_tready(axis_udp_to_ip_slice_meta.ready),
+ .s_axis_tdata(axis_udp_to_ip_slice_meta.data),
+ .m_axis_tvalid(axis_udp_to_ip_meta.valid),
+ .m_axis_tready(axis_udp_to_ip_meta.ready),
+ .m_axis_tdata(axis_udp_to_ip_meta.data)
+);
+
  
  udp_ip udp_inst (
    .reg_listen_port_V(listen_port),
@@ -109,26 +136,26 @@ ipv4_ip ipv4_inst (
    .s_axis_rx_data_TDATA(axis_ip_to_udp_data.data),
    .s_axis_rx_data_TKEEP(axis_ip_to_udp_data.keep),
    .s_axis_rx_data_TLAST(axis_ip_to_udp_data.last),
-   .m_axis_rx_meta_V_TVALID(m_axis_udp_rx_metadata.valid),
-   .m_axis_rx_meta_V_TREADY(m_axis_udp_rx_metadata.ready),
-   .m_axis_rx_meta_V_TDATA(m_axis_udp_rx_metadata.data),
+   .m_axis_rx_meta_V_TVALID(axis_udp_rx_metadata.valid),
+   .m_axis_rx_meta_V_TREADY(axis_udp_rx_metadata.ready),
+   .m_axis_rx_meta_V_TDATA(axis_udp_rx_metadata.data),
    .m_axis_rx_data_TVALID(m_axis_udp_rx_data.valid),
    .m_axis_rx_data_TREADY(m_axis_udp_rx_data.ready),
    .m_axis_rx_data_TDATA(m_axis_udp_rx_data.data),
    .m_axis_rx_data_TKEEP(m_axis_udp_rx_data.keep),
    .m_axis_rx_data_TLAST(m_axis_udp_rx_data.last),
    //TX
-   .s_axis_tx_meta_V_TVALID(s_axis_udp_tx_metadata.valid),
-   .s_axis_tx_meta_V_TREADY(s_axis_udp_tx_metadata.ready),
-   .s_axis_tx_meta_V_TDATA(s_axis_udp_tx_metadata.data),
+   .s_axis_tx_meta_V_TVALID(axis_udp_tx_metadata.valid),
+   .s_axis_tx_meta_V_TREADY(axis_udp_tx_metadata.ready),
+   .s_axis_tx_meta_V_TDATA(axis_udp_tx_metadata.data),
    .s_axis_tx_data_TVALID(s_axis_udp_tx_data.valid),
    .s_axis_tx_data_TREADY(s_axis_udp_tx_data.ready),
    .s_axis_tx_data_TDATA(s_axis_udp_tx_data.data),
    .s_axis_tx_data_TKEEP(s_axis_udp_tx_data.keep),
    .s_axis_tx_data_TLAST(s_axis_udp_tx_data.last),
-   .m_axis_tx_meta_V_TVALID(axis_udp_to_ip_meta.valid),
-   .m_axis_tx_meta_V_TREADY(axis_udp_to_ip_meta.ready),
-   .m_axis_tx_meta_V_TDATA(axis_udp_to_ip_meta.data),
+   .m_axis_tx_meta_V_TVALID(axis_udp_to_ip_slice_meta.valid),
+   .m_axis_tx_meta_V_TREADY(axis_udp_to_ip_slice_meta.ready),
+   .m_axis_tx_meta_V_TDATA(axis_udp_to_ip_slice_meta.data),
    .m_axis_tx_data_TVALID(axis_udp_to_ip_data.valid),
    .m_axis_tx_data_TREADY(axis_udp_to_ip_data.ready),
    .m_axis_tx_data_TDATA(axis_udp_to_ip_data.data),
@@ -138,6 +165,31 @@ ipv4_ip ipv4_inst (
    .ap_clk(net_clk),
    .ap_rst_n(net_aresetn)
  );
+ 
+ // Register slices to avoid combinatorial loops created by HLS due to the new axis INTERFACE (enforced since 19.1)
+
+ axis_register_slice_176 rx_udp_meta_slice(
+ .aclk(net_clk),
+ .aresetn(net_aresetn),
+ .s_axis_tvalid(axis_udp_rx_metadata.valid),
+ .s_axis_tready(axis_udp_rx_metadata.ready),
+ .s_axis_tdata(axis_udp_rx_metadata.data),
+ .m_axis_tvalid(m_axis_udp_rx_metadata.valid),
+ .m_axis_tready(m_axis_udp_rx_metadata.ready),
+ .m_axis_tdata(m_axis_udp_rx_metadata.data)
+);
+
+axis_register_slice_176 tx_udp_meta_slice(
+ .aclk(net_clk),
+ .aresetn(net_aresetn),
+ .s_axis_tvalid(s_axis_udp_tx_metadata.valid),
+ .s_axis_tready(s_axis_udp_tx_metadata.ready),
+ .s_axis_tdata(s_axis_udp_tx_metadata.data),
+ .m_axis_tvalid(axis_udp_tx_metadata.valid),
+ .m_axis_tready(axis_udp_tx_metadata.ready),
+ .m_axis_tdata(axis_udp_tx_metadata.data)
+);
+
 end
 else begin
 
diff --git a/hls/hash_table/hash_table.hpp b/hls/hash_table/hash_table.hpp
index 0f8ccce..ec63948 100644
--- a/hls/hash_table/hash_table.hpp
+++ b/hls/hash_table/hash_table.hpp
@@ -32,8 +32,8 @@ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <math.h>
 
 //Copied from hlslib by Johannes de Fine Licht https://github.com/definelicht/hlslib/blob/master/include/hlslib/xilinx/Utility.h
-constexpr unsigned char ConstLog2(unsigned long val) {
-  return val == 0 ? 0 : 1 + ConstLog2(val >> 1);
+constexpr unsigned long ConstLog2(unsigned long val) {
+  return val == 1 ? 0 : 1 + ConstLog2(val >> 1);
 }
 
 const uint32_t MAX_KEY_SIZE = 64;
diff --git a/hls/ip_handler/ip_handler.cpp b/hls/ip_handler/ip_handler.cpp
index 10ae6fb..750fcc7 100755
--- a/hls/ip_handler/ip_handler.cpp
+++ b/hls/ip_handler/ip_handler.cpp
@@ -454,7 +454,7 @@ void ip_handler(hls::stream<net_axis<WIDTH> >&		s_axis_raw,
 	#pragma HLS STREAM variable=ipDataCheckFifo depth=64 //8, must hold IP header for checksum checking, max. 15 x 32bit
 	#pragma HLS STREAM variable=ipDataDropFifo depth=2
 	#pragma HLS STREAM variable=ipDataCutFifo depth=2
-	#pragma HLS STREAM variable=udpDataFifo depth=1
+	#pragma HLS STREAM variable=udpDataFifo depth=2
 	#pragma HLS STREAM variable=iph_subSumsFifoOut depth=2
 	#pragma HLS STREAM variable=validChecksumFifo depth=4
 	#pragma HLS STREAM variable=validIpAddressFifo depth=32
diff --git a/hls/toe/dummy_memory.hpp b/hls/toe/dummy_memory.hpp
index c2f6acd..02aadcb 100755
--- a/hls/toe/dummy_memory.hpp
+++ b/hls/toe/dummy_memory.hpp
@@ -48,7 +48,7 @@ private:
 	int readLen;
 	ap_uint<16> writeAddr; //<8>
 	ap_uint<16> writeId;
-	//ap_uint<16> writeLen;
+	ap_uint<16> writeLen;
 	std::map<ap_uint<16>, ap_uint<8>*> storage;
 	std::map<ap_uint<16>, ap_uint<8>*>::iterator readStorageIt;
 	std::map<ap_uint<16>, ap_uint<8>*>::iterator writeStorageIt;
@@ -71,6 +71,9 @@ void dummyMemory<WIDTH>::setWriteCmd(mmCmd cmd)
 //	writeAddr = cmd.saddr(7, 0);
 	writeAddr = cmd.saddr(15, 0);
 	writeId = cmd.saddr(31, 16);
+	uint16_t tempLen = (uint16_t) cmd.bbt(15, 0);
+	writeLen = (int) tempLen;
+	//std::cout << "WRITE command: " << std::hex << cmd.saddr(15, 0) << " " << std::dec << cmd.bbt << std::endl;
 }
 
 template <int WIDTH>
@@ -119,12 +122,17 @@ void dummyMemory<WIDTH>::writeWord(net_axis<WIDTH>& word)
 		{
 			(writeStorageIt->second)[writeAddr] = word.data((i*8)+7, i*8);
 			writeAddr++;
+			writeLen--;
 		}
 		else
 		{
 			break;
 		}
 	}
+	if (word.last)
+	{
+		assert(writeLen == 0);
+	}
 }
 
 template <int WIDTH>
diff --git a/hls/toe/make.tcl.in b/hls/toe/make.tcl.in
index 7a8e234..d832f38 100644
--- a/hls/toe/make.tcl.in
+++ b/hls/toe/make.tcl.in
@@ -8,27 +8,29 @@ create_clock -period ${CLOCK_PERIOD} -name default
 set_top ${PROJECT_NAME}_top
 
 add_files ${CMAKE_CURRENT_SOURCE_DIR}/../axi_utils.cpp
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/ack_delay/ack_delay.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/close_timer/close_timer.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/event_engine/event_engine.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/port_table/port_table.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/probe_timer/probe_timer.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/retransmit_timer/retransmit_timer.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/rx_app_if/rx_app_if.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/rx_app_stream_if/rx_app_stream_if.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/rx_engine/rx_engine.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/rx_sar_table/rx_sar_table.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/session_lookup_controller/session_lookup_controller.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/state_table/state_table.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_app_if/tx_app_if.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_app_stream_if/tx_app_stream_if.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_engine/tx_engine.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_sar_table/tx_sar_table.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_app_interface/tx_app_interface.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-add_files ${CMAKE_CURRENT_SOURCE_DIR}/toe.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
-
-
-#add_files -tb test_toe.cpp
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/ack_delay/ack_delay.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/close_timer/close_timer.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/event_engine/event_engine.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/port_table/port_table.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/probe_timer/probe_timer.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/retransmit_timer/retransmit_timer.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/rx_app_if/rx_app_if.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/rx_app_stream_if/rx_app_stream_if.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/rx_engine/rx_engine.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/rx_sar_table/rx_sar_table.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/session_lookup_controller/session_lookup_controller.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/state_table/state_table.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_app_if/tx_app_if.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_app_stream_if/tx_app_stream_if.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_engine/tx_engine.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_sar_table/tx_sar_table.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/tx_app_interface/tx_app_interface.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+add_files ${CMAKE_CURRENT_SOURCE_DIR}/toe.cpp -cflags "-std=c++11 -I${CMAKE_CURRENT_BINARY_DIR}"
+
+
+add_files -tb ${CMAKE_CURRENT_SOURCE_DIR}/toe_tb.cpp
+
+config_rtl -disable_start_propagation
 
 
 #Check which command
@@ -37,7 +39,8 @@ set command [lindex $argv 2]
 if {$command == "synthesis"} {
    csynth_design
 } elseif {$command == "csim"} {
-   csim_design
+   csim_design -clean -argv {0 ${CMAKE_CURRENT_SOURCE_DIR}/testVectors/io_fin_5.dat ${CMAKE_CURRENT_SOURCE_DIR}/testVectors/rxOutput.dat ${CMAKE_CURRENT_SOURCE_DIR}/testVectors/txOutput.dat ${CMAKE_CURRENT_SOURCE_DIR}/testVectors/rx_io_fin_5.gold}
+#   csim_design -clean -argv {0 ${CMAKE_CURRENT_SOURCE_DIR}/testVectors/mysyn2.dat ${CMAKE_CURRENT_SOURCE_DIR}/testVectors/rxOutput.dat ${CMAKE_CURRENT_SOURCE_DIR}/testVectors/txOutput.dat ${CMAKE_CURRENT_SOURCE_DIR}/testVectors/rx_io_fin_5.gold}
 } elseif {$command == "ip"} {
    export_design -format ip_catalog -ipname "toe" -display_name "10G TCP Offload Engine" -description "TCP Offload Engine supporting 10Gbps line rate, up to 10K concurrent sessions." -vendor "ethz.systems" -version "1.6"
 } elseif {$command == "installip"} {
diff --git a/hls/toe/rx_engine/rx_engine.cpp b/hls/toe/rx_engine/rx_engine.cpp
index 7d7057c..291cee7 100755
--- a/hls/toe/rx_engine/rx_engine.cpp
+++ b/hls/toe/rx_engine/rx_engine.cpp
@@ -79,7 +79,7 @@ void process_ipv4(	stream<net_axis<WIDTH> >&		dataIn,
 //align remove options??
 //USE code from ipv4.hpp
 template <int WIDTH>
-void drop_optional_header(	stream<ap_uint<4> >&	process2dropLengthFifo,
+void drop_optional_ip_header(	stream<ap_uint<4> >&	process2dropLengthFifo,
 							stream<net_axis<WIDTH> >&	process2dropFifo,
 							stream<net_axis<WIDTH> >&	dataOut)
 {
@@ -706,7 +706,6 @@ void drop_optional_header_fields(	hls::stream<optionalFieldsMeta>&		metaIn,
 				state = 0;
 				if (dataOffset != 0 && (dataOffset*4 < WIDTH/8) && currWord.keep[dataOffset*4] != 0)
 				{
-					std::cout << "s0 -> s2" << std::endl;
 					state = 2;
 				}
 			}
@@ -734,24 +733,23 @@ void drop_optional_header_fields(	hls::stream<optionalFieldsMeta>&		metaIn,
 			else if (dataOffset == 0)
 			{
 				sendWord = currWord;
+				dataOut.write(sendWord);
 			}
 			else //if (dataOffset == 1)
 			{
-				std::cout << "AA" << std::endl;
 				sendWord.data(WIDTH - (dataOffset*32) -1, 0) = prevWord.data(WIDTH-1, dataOffset*32);
 				sendWord.keep((WIDTH/8) - (dataOffset*4) -1, 0) = prevWord.keep(WIDTH/8-1, dataOffset*4);
 				sendWord.data(WIDTH-1, WIDTH - (dataOffset*32)) = currWord.data(dataOffset*32-1, 0);
 				sendWord.keep(WIDTH/8-1, (WIDTH/8) - (dataOffset*4)) = currWord.keep(dataOffset*4-1, 0);
 				sendWord.last = (currWord.keep[dataOffset*4] == 0);
-				std::cout << "BB" << std::endl;
+				dataOut.write(sendWord);
 			}
 
-			dataOut.write(sendWord);
 			prevWord = currWord;
 			if (currWord.last)
 			{
 				state = 0;
-				if (!sendWord.last)
+				if (currWord.keep[dataOffset*4] != 0 && dataOffset != 0)
 				{
 					state = 2;
 				}
@@ -761,14 +759,12 @@ void drop_optional_header_fields(	hls::stream<optionalFieldsMeta>&		metaIn,
 	case 2:
 	{
 		net_axis<WIDTH> sendWord;
-		std::cout << "CC" << std::endl;
 		sendWord.data(WIDTH - (dataOffset*32) -1, 0) = prevWord.data(WIDTH-1, dataOffset*32);
 		sendWord.keep((WIDTH/8) - (dataOffset*4) -1, 0) = prevWord.keep(WIDTH/8-1, dataOffset*4);
 		sendWord.data(WIDTH-1, WIDTH - (dataOffset*32)) = 0;
 		sendWord.keep(WIDTH/8-1, (WIDTH/8) - (dataOffset*4)) = 0;
 		sendWord.last = 1;
 		dataOut.write(sendWord);
-		std::cout << "DD" << std::endl;
 		state = 0;
 		break;
 	}
@@ -1701,7 +1697,7 @@ void rxEngMemWrite(	hls::stream<net_axis<WIDTH> >& 	dataIn,
 			{
 				lengthFirstPkg = BUFFER_SIZE - cmd.saddr;
 				remainingLength = lengthFirstPkg;
-				offset = lengthFirstPkg(5, 0); //TODO use lengthFirstPkg(log2(WIDTH/8), 0)
+				offset = lengthFirstPkg(DATA_KEEP_BITS - 1, 0);
 
 				doubleAccess.write(true);
 				cmdOut.write(mmCmd(cmd.saddr, lengthFirstPkg));
@@ -1879,7 +1875,7 @@ void rx_engine(	stream<net_axis<WIDTH> >&					ipRxData,
 	#pragma HLS stream variable=rxEng_dataBuffer0 depth=8
 	#pragma HLS stream variable=rxEng_dataBuffer1 depth=8
 	#pragma HLS stream variable=rxEng_dataBuffer2 depth=256 //critical, tcp checksum computation
-	#pragma HLS stream variable=rxEng_dataBuffer3 depth=8
+	#pragma HLS stream variable=rxEng_dataBuffer3 depth=32
 	#pragma HLS stream variable=rxEng_dataBuffer3a depth=8
 	#pragma HLS stream variable=rxEng_dataBuffer3b depth=8
 	#pragma HLS DATA_PACK variable=rxEng_dataBuffer0
@@ -1946,7 +1942,7 @@ void rx_engine(	stream<net_axis<WIDTH> >&					ipRxData,
 
 
 	process_ipv4<WIDTH>(ipRxData, rx_process2dropLengthFifo, rxEng_ipMetaFifo, rxEng_dataBuffer0);
-	drop_optional_header<WIDTH>(rx_process2dropLengthFifo, rxEng_dataBuffer0, rxEng_dataBuffer4);
+	drop_optional_ip_header<WIDTH>(rx_process2dropLengthFifo, rxEng_dataBuffer0, rxEng_dataBuffer4);
 	//align
 	lshiftWordByOctet<WIDTH, 2>(((TCP_PSEUDO_HEADER_SIZE%WIDTH)/8), rxEng_dataBuffer4, rxEng_dataBuffer5);
 	//rxTcpLengthExtract(ipRxData, rxEng_dataBuffer0, rxEng_tcpLenFifo);
diff --git a/hls/toe/toe.cpp b/hls/toe/toe.cpp
index ba2a3bf..9fe68db 100755
--- a/hls/toe/toe.cpp
+++ b/hls/toe/toe.cpp
@@ -179,7 +179,7 @@ void rxAppMemDataRead(	stream<net_axis<WIDTH> >&	rxBufferReadData,
 			//rxAppMemRdOffset = 0;
 			rxAppDoubleAccessFlag = rxAppDoubleAccess.read();
 			rxBufferReadData.read(rxAppMemRdRxWord);
-			rxAppMemRdOffset = keepToLen<WIDTH>(rxAppMemRdRxWord.keep);						// Count the number of valid bytes in this data word
+			rxAppMemRdOffset = keepToLen(rxAppMemRdRxWord.keep);						// Count the number of valid bytes in this data word
 			if (rxAppMemRdRxWord.last == 1 && rxAppDoubleAccessFlag == 1) {		// If this is the last word and this access was broken down
 				rxAppMemRdRxWord.last = ~rxAppDoubleAccessFlag;					// Negate the last flag inn the axiWord and determine if there's an offset
 				if (rxAppMemRdOffset == (WIDTH/8)) // No need to offset anything
@@ -212,7 +212,7 @@ void rxAppMemDataRead(	stream<net_axis<WIDTH> >&	rxBufferReadData,
 		if (!rxBufferReadData.empty())
 		{
 			rxBufferReadData.read(rxAppMemRdRxWord);
-			rxAppMemRdOffset = keepToLen<WIDTH>(rxAppMemRdRxWord.keep);						// Count the number of valid bytes in this data word
+			rxAppMemRdOffset = keepToLen(rxAppMemRdRxWord.keep);						// Count the number of valid bytes in this data word
 
 			if (rxAppMemRdRxWord.last == 1 && rxAppDoubleAccessFlag == 1) {		// If this is the last word and this access was broken down
 				rxAppMemRdRxWord.last = ~rxAppDoubleAccessFlag;					// Negate the last flag inn the axiWord and determine if there's an offset
@@ -257,7 +257,7 @@ void rxAppMemDataRead(	stream<net_axis<WIDTH> >&	rxBufferReadData,
 			temp.data.range((rxAppMemRdOffset * 8) - 1, 0) = rxAppMemRdRxWord.data.range((rxAppMemRdOffset * 8) - 1, 0);	// In any case, insert the data of the new data word in the old one. Here we don't pay attention to the exact number of bytes in the new data word. In case they don't fill the entire remaining gap, there will be garbage in the output but it doesn't matter since the KEEP signal indicates which bytes are valid.
 			rxAppMemRdRxWord = rxBufferReadData.read();
 			temp.data.range(WIDTH-1, (rxAppMemRdOffset * 8)) = rxAppMemRdRxWord.data.range(((8 - rxAppMemRdOffset) * 8) - 1, 0);				// Buffer & realign temp into rxAppmemRdRxWord (which is a static variable)
-			ap_uint<8> tempCounter = keepToLen<WIDTH>(rxAppMemRdRxWord.keep);					// Determine how any bytes are valid in the new data word. It might be that this is the only data word of the 2nd segment
+			ap_uint<8> tempCounter = keepToLen(rxAppMemRdRxWord.keep);					// Determine how any bytes are valid in the new data word. It might be that this is the only data word of the 2nd segment
 			rxAppOffsetBuffer = tempCounter - ((WIDTH/8) - rxAppMemRdOffset);				// Calculate the number of bytes to go into the next & final data word
 			if (rxAppMemRdRxWord.last == 1) {
 				if ((tempCounter + rxAppMemRdOffset) <= (WIDTH/8)) {						// Check if the residue from the 1st segment and the data in the 1st data word of the 2nd segment fill this data word. If not...
@@ -285,7 +285,7 @@ void rxAppMemDataRead(	stream<net_axis<WIDTH> >&	rxBufferReadData,
 			temp.data.range((rxAppMemRdOffset * 8) - 1, 0) = rxAppMemRdRxWord.data.range(WIDTH-1, ((8 - rxAppMemRdOffset) * 8));
 			rxAppMemRdRxWord = rxBufferReadData.read();							// Read the new data word in
 			temp.data.range(WIDTH-1, (rxAppMemRdOffset * 8)) = rxAppMemRdRxWord.data.range(((8 - rxAppMemRdOffset) * 8) - 1, 0);
-			ap_uint<8> tempCounter = keepToLen<WIDTH>(rxAppMemRdRxWord.keep);			// Determine how any bytes are valid in the new data word. It might be that this is the only data word of the 2nd segment
+			ap_uint<8> tempCounter = keepToLen(rxAppMemRdRxWord.keep);			// Determine how any bytes are valid in the new data word. It might be that this is the only data word of the 2nd segment
 			rxAppOffsetBuffer = tempCounter - ((WIDTH/8) - rxAppMemRdOffset);				// Calculate the number of bytes to go into the next & final data word
 			if (rxAppMemRdRxWord.last == 1) {
 				if ((tempCounter + rxAppMemRdOffset) <= (WIDTH/8)) {							// Check if the residue from the 1st segment and the data in the 1st data word of the 2nd segment fill this data word. If not...
diff --git a/hls/toe/toe_config.hpp.in b/hls/toe/toe_config.hpp.in
index 29fd5d4..5fe0fb6 100644
--- a/hls/toe/toe_config.hpp.in
+++ b/hls/toe/toe_config.hpp.in
@@ -1,10 +1,16 @@
 #pragma once
 #include <stdint.h>
 
+//Copied from hlslib by Johannes de Fine Licht https://github.com/definelicht/hlslib/blob/master/include/hlslib/xilinx/Utility.h
+constexpr unsigned long ConstLog2(unsigned long val) {
+  return val == 1 ? 0 : 1 + ConstLog2(val >> 1);
+}
 
 const uint16_t MSS = ${TCP_STACK_MSS};
 const uint16_t MAX_SESSIONS = ${TCP_STACK_MAX_SESSIONS};
 const unsigned DATA_WIDTH = ${DATA_WIDTH} * 8;
+const unsigned DATA_WIDTH_BITS = ConstLog2(DATA_WIDTH);
+const unsigned DATA_KEEP_BITS = ConstLog2(DATA_WIDTH/8);
 
 // TCP_NODELAY flag, to disable Nagle's Algorithm
 #define TCP_NODELAY ${TCP_STACK_NODELAY_EN}
diff --git a/hls/toe/toe_tb.cpp b/hls/toe/toe_tb.cpp
index 99e1a1a..9601e95 100755
--- a/hls/toe/toe_tb.cpp
+++ b/hls/toe/toe_tb.cpp
@@ -33,6 +33,7 @@ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.// Copyright (c) 2015 Xilinx,
 #include <map>
 #include <string>
 
+#define noOfTxSessions 0 // Number of Tx Sessions to open for testing
 #define totalSimCycles 2500000
 
 using namespace std;
@@ -320,25 +321,15 @@ string decodeApUint8(ap_uint<8> inputNumber) {
 
 ap_uint<64> encodeApUint64(string dataString){
 	ap_uint<64> tempOutput = 0;
-	unsigned short int	tempValue = 16;
-	static const char* const	lut = "0123456789ABCDEF";
 
-	for (unsigned short int i = 0; i<dataString.size();++i) {
-		for (unsigned short int j = 0;j<16;++j) {
-			if (lut[j] == dataString[i]) {
-				tempValue = j;
-				break;
-			}
-		}
-		if (tempValue != 16) {
-			for (short int k = 3;k>=0;--k) {
-				if (tempValue >= pow(2.0, k)) {
-					tempOutput.bit(63-(4*i+(3-k))) = 1;
-					tempValue -= static_cast <unsigned short int>(pow(2.0, k));
-				}
-			}
-		}
+	for (int i = 0; i < 64/8; i++)
+	{
+		uint16_t temp;
+		std::stringstream parser(dataString.substr(i*2, 2));
+		parser >> std::hex >> temp;
+		tempOutput(63-(i*8), 56-(i*8)) = temp;
 	}
+
 	return tempOutput;
 }
 
@@ -472,7 +463,7 @@ bool parseOutputPacket(deque<net_axis<64> > &outputPacketizer, map<fourTuple, ap
 		outputPacketizer[4].data.bit(12) = 1;												// Set the ACK bit
 		ap_uint<16> tempChecksum = recalculateChecksum(outputPacketizer);
 		outputPacketizer[4].data.range(47, 32) = (tempChecksum.range(7, 0), tempChecksum(15, 8));
-		inputPacketizer.push_back(outputPacketizer[4]);
+		//inputPacketizer.push_back(outputPacketizer[4]);
 		/*cerr << hex << outputPacketizer[0].data << endl;
 		cerr << hex << outputPacketizer[1].data << endl;
 		cerr << hex << outputPacketizer[2].data << endl;
diff --git a/hls/toe/tx_app_stream_if/tx_app_stream_if.cpp b/hls/toe/tx_app_stream_if/tx_app_stream_if.cpp
index af6096f..d8901d5 100755
--- a/hls/toe/tx_app_stream_if/tx_app_stream_if.cpp
+++ b/hls/toe/tx_app_stream_if/tx_app_stream_if.cpp
@@ -156,7 +156,7 @@ void tasi_pkg_pusher(hls::stream<mmCmd>&					tasi_meta2pkgPushCmd,
 			{
 				lengthFirstPkg = BUFFER_SIZE - cmd.saddr;
 				remainingLength = lengthFirstPkg;
-				offset = lengthFirstPkg(5, 0); //TODO use lengthFirstPkg(log2(WIDTH/8), 0)
+				offset = lengthFirstPkg(DATA_KEEP_BITS - 1, 0);
 
 				txBufferWriteCmd.write(mmCmd(cmd.saddr, lengthFirstPkg));
 				tasiPkgPushState = CUT_FIRST;
@@ -228,10 +228,6 @@ void tasi_pkg_pusher(hls::stream<mmCmd>&					tasi_meta2pkgPushCmd,
 			net_axis<WIDTH> currWord = appTxDataIn.read();
 			net_axis<WIDTH> sendWord;
 			sendWord = alignWords<WIDTH>(offset, prevWord, currWord);
-			/*sendWord.data(WIDTH-1, WIDTH - (offset*8)) = currWord.data(offset*8-1, 0);
-			sendWord.data(WIDTH - (offset*8) -1, 0) = prevWord.data(WIDTH-1, offset*8);
-			sendWord.keep(WIDTH/8-1, WIDTH/8 - (offset)) = currWord.keep(offset-1, 0);
-			sendWord.keep(WIDTH/8 - (offset) -1, 0) = prevWord.keep(WIDTH/8-1, offset);*/
 			sendWord.last = (currWord.keep[offset] == 0);
 
 			txBufferWriteData.write(sendWord);
@@ -254,9 +250,6 @@ void tasi_pkg_pusher(hls::stream<mmCmd>&					tasi_meta2pkgPushCmd,
 #endif
 		net_axis<WIDTH> emptyWord;
 		sendWord = alignWords<WIDTH>(offset, prevWord, emptyWord);
-		/*sendWord.data(WIDTH - (offset*8) -1, 0) = prevWord.data(WIDTH-1, offset*8);
-		sendWord.keep(WIDTH/8-1, WIDTH/8 - (offset)) = 0;
-		sendWord.keep(WIDTH/8 - (offset) -1, 0) = prevWord.keep(WIDTH/8-1, offset);*/
 		sendWord.last = 1;
 		txBufferWriteData.write(sendWord);
 		tasiPkgPushState = IDLE;
diff --git a/hls/udp/udp.cpp b/hls/udp/udp.cpp
index 6c6308d..7923586 100644
--- a/hls/udp/udp.cpp
+++ b/hls/udp/udp.cpp
@@ -219,6 +219,7 @@ void udp(		hls::stream<ipMeta>&		s_axis_rx_meta,
 	static hls::stream<udpMeta>	rx_udpMetaFifo("rx_udpMetaFifo");
 	#pragma HLS STREAM depth=2 variable=rx_udp2shiftFifo
 	#pragma HLS STREAM depth=2 variable=rx_udpMetaFifo
+	#pragma HLS DATA_PACK variable=rx_udpMetaFifo
 
 	process_udp<WIDTH>(s_axis_rx_data, rx_udpMetaFifo, rx_udp2shiftFifo, reg_listen_port);
 	rshiftWordByOctet<net_axis<WIDTH>, WIDTH, 2>(((UDP_HEADER_SIZE%WIDTH)/8), rx_udp2shiftFifo, m_axis_rx_data);
-- 
GitLab