Merge branch 'spinler-feat-dpdk_toplevel_sim' into 'devel'

Introduce Top-level simulation example for external processes using libnfb-ext-grpc plugin See merge request ndk/ndk-fpga!133
CESNET · Jan 10, 2025 · fc40cfe · fc40cfe
2 parents 9e857a3 + 6b4b9fc
commit fc40cfe
Show file tree

Hide file tree

Showing 11 changed files with 511 additions and 178 deletions.
diff --git a/apps/minimal/tests/cocotb/cocotb_grpc.py b/apps/minimal/tests/cocotb/cocotb_grpc.py
@@ -0,0 +1,80 @@
+#import sys
+import logging
+
+import cocotb
+from cocotb.triggers import Timer
+
+import scapy.all
+import scapy.utils
+import scapy.volatile
+import scapy.contrib.mpls
+
+from ndk_core import NFBDevice
+
+import cocotbext.ofm.utils.sim.modelsim as ms
+import cocotb.utils
+
+from cocotbext.ofm.utils.sim.bus import MfbBus, MiBus
+from cocotbext.nfb.ext.grpc import RAM, NfbDmaThreadedGrpcServer
+
+
+#logging.basicConfig(stream=sys.stderr, force=True)
+#logging.getLogger().setLevel(logging.DEBUG)
+
+logger = logging.getLogger(__name__)
+
+e = cocotb.external
+st = cocotb.utils.get_sim_time
+
+
+async def get_dev(dut, init=True, **kwargs):
+    dev = NFBDevice(dut, **kwargs)
+    if init:
+        await dev.init()
+    return dev, dev.nfb
+
+
+@cocotb.test()
+async def test_grpc(dut):
+    ram = RAM()
+    dev, nfb = await get_dev(dut, ram=ram)
+
+    # Generate packets on RX eth
+    async def rx_packet(eth, count):
+        for _ in range(count):
+            pkt = scapy.all.Ether()/scapy.all.IP(dst="127.0.0.1")/scapy.all.TCP()/"GET /index.html HTTP/1.0 \n\n"
+            await eth.write_packet(list(bytes(pkt)))
+
+    for rx in dev._eth_rx_driver:
+        cocotb.start_soon(rx_packet(rx, 50000))
+
+    # Log packets on TX eth
+    for i, tx in enumerate(dev._eth_tx_monitor):
+        def eth_tx_monitor_cb(p):
+            logger.debug(f"tx_eth{i} packet transmitted: len={len(p)}, data={bytes(p).hex()}")
+        tx.add_callback(eth_tx_monitor_cb)
+
+    # Run gRPC server with Nfb and Dma services usable for libnfb-ext-grpc
+    with NfbDmaThreadedGrpcServer(ram, dev):
+        await Timer(10, units='ms')
+
+
+core = NFBDevice.core_instance_from_top(cocotb.top)
+
+pcic = core.pcie_i.pcie_core_i
+#ms.cmd(f"log -recursive {ms.cocotb2path(core)}/*")
+
+ms.add_wave(core.pcie_i.MI_RESET)
+ms.add_wave(core.pcie_i.MI_CLK)
+MiBus(core.pcie_i, 'MI', 0, label='MI_PCIe').add_wave()
+
+ms.add_wave(core.app_i.MI_CLK)
+MiBus(core.app_i, 'MI', label='MI_APP').add_wave()
+
+ms.add_wave(core.app_i.CLK_ETH[0])
+MfbBus(core.app_i, 'ETH_RX_MFB', 0).add_wave()
+MfbBus(core.app_i, 'ETH_TX_MFB', 0).add_wave()
+
+ms.add_wave(core.app_i.DMA_CLK)
+MfbBus(core.app_i, 'DMA_RX_MFB', 0).add_wave()
+MfbBus(core.app_i, 'DMA_TX_MFB', 0).add_wave()
diff --git a/apps/minimal/tests/cocotb/cocotb_test.py b/apps/minimal/tests/cocotb/cocotb_test.py
@@ -1,6 +1,6 @@
-import sys
+#import sys
+import logging
 import cocotb
-#import logging
 from cocotb.triggers import Timer
 
 from ndk_core import NFBDevice
@@ -11,15 +11,20 @@
 from cocotbext.ofm.utils.sim.bus import MfbBus, MiBus, DmaUpMvbBus, DmaDownMvbBus
 
 
+#logging.basicConfig(stream=sys.stderr, force=True)
+#logging.getLogger().setLevel(logging.INFO)
+
+logger = logging.getLogger(__name__)
+
+# Shortcuts
 e = cocotb.external
 st = cocotb.utils.get_sim_time
 
 
-async def get_dev(dut, init=True):
-    dev = NFBDevice(dut)
+async def get_dev(dut, init=True, **kwargs):
+    dev = NFBDevice(dut, **kwargs)
     if init:
         await dev.init()
-    # dev._servicer._log.setLevel(logging.DEBUG)
     return dev, dev.nfb
 
 
@@ -89,11 +94,10 @@ async def _test_ndp_sendmsg(dut, dev=None, nfb=None):
 
     pkt = bytes([i for i in range(72)])
 
-    def eth_tx_monitor_cb(p):
-        print(len(p), bytes(p).hex())
-        #assert bytes(p) == pkt
-
-    dev._eth_tx_monitor[0].add_callback(eth_tx_monitor_cb)
+    for i, tx in enumerate(dev._eth_tx_monitor):
+        def eth_tx_monitor_cb(p):
+            logger.debug(f"tx_eth{i} packet transmitted: len={len(p)}, data={bytes(p).hex()}")
+        tx.add_callback(eth_tx_monitor_cb)
 
     count = 1
     for i in range(count):
@@ -113,11 +117,10 @@ async def _test_ndp_sendmsg_burst(dut, dev=None, nfb=None):
         await e(eth.txmac.reset_stats)()
         await e(eth.txmac.enable)()
 
-    def eth_tx_monitor_cb(p):
-        print(len(p), bytes(p).hex())
-        sys.stdout.flush()
-
-    dev._eth_tx_monitor[0].add_callback(eth_tx_monitor_cb)
+    for i, tx in enumerate(dev._eth_tx_monitor):
+        def eth_tx_monitor_cb(p):
+            logger.debug(f"tx_eth{i} packet transmitted: len={len(p)}, data={bytes(p).hex()}")
+        tx.add_callback(eth_tx_monitor_cb)
 
     pkts = range(20, 28)
     for i in pkts:

diff --git a/apps/minimal/tests/cocotb/pyproject.toml b/apps/minimal/tests/cocotb/pyproject.toml
@@ -5,6 +5,12 @@ dependencies = [
 	"cocotbext-ofm[nfb] @ ${NDK_FPGA_COCOTBEXT_OFM_URL}",
 ]
 
+[project.optional-dependencies]
+grpc = [
+	"scapy",
+	"cocotbext-ofm[nfb_grpcio] @ ${NDK_FPGA_COCOTBEXT_OFM_URL}",
+]
+
 [build-system]
 requires = ["pdm-backend"]
 build-backend = "pdm.backend"
diff --git a/apps/minimal/tests/cocotb/readme.rst b/apps/minimal/tests/cocotb/readme.rst
@@ -0,0 +1,148 @@
+=====================
+Top-level simulations
+=====================
+
+The top-level simulations (TLS) are suitable for:
+
+-  software testing without access to real acceleration card
+-  whole design compile and basic functionality check
+-  address space debugging
+-  resets, clocks, clock domain crossings check
+
+Basics
+======
+
+What simulation includes / excludes
+-----------------------------------
+
+The TLS doesn’t simulate IP cores but emulates their I/O signals. The
+primary effort is to emulate the Ethernet and PCIe I/O and DMA for the most used FPGA families
+(Xilinx US+, Intel P-TILE, Intel E-TILE, ...).
+
+Common tips for cocotb
+----------------------
+
+-  Use ``sys.stderr`` stream for ModelSim / Questa to achieve instant
+   display of log:
+
+   .. code:: python
+
+      logging.basicConfig(stream=sys.stderr, force=True)
+
+-  Verbose messages for all loggers except some:
+
+   .. code:: python
+
+      logging.getLogger().setLevel(logging.INFO)
+      logging.getLogger("cocotbext.nfb.ext.grpc.server").setLevel(logging.WARNING)
+
+gRPC example
+============
+
+This variant enables the usage of external processes and is intended for
+manual interaction. The TLS doesn’t contain any real test cases. Instead, it
+starts the gRPC server, runs for a specified simulation time (e.g., 10ms), and
+expects the user to execute an application that uses the acceleration
+card through the ``libnfb`` gRPC extension. The application then generates
+I/O read and write requests and the simulator translates them to the bus
+interface and back.
+
+A feature in ``libnfb-ext-grpc`` enables simple handling of DMA requests
+from the simulated design. If the ``dma_vas`` tag is present in the device
+string, the library opens a reverse stream. As soon as the DMA request
+arrives in the process, ``libnfb-ext-grpc`` copies data from/to the virtual
+address space of the process. Just use the virtual address for
+descriptor values. There are no boundary checks and the request can
+potentially harm the process, which probably gets killed by ``SIGSEGV``
+signal in the case of an error.
+
+The simple DMA request handling is most suitable for a DPDK application;
+see the section below.
+
+Prerequisites
+-------------
+
+libnfb-ext-grpc.so from the libnfb-ext-grpc package (RPM)
+
+Running
+-------
+
+1. Prepare a Python environment and install packages
+
+   .. code:: shell
+
+      . ./prepare.sh
+
+2. Install specific dependencies for the gRPC-based simulation
+
+   .. code:: shell
+
+      pip install .[grpc]
+
+3. Run the simulation
+
+   .. code:: shell
+
+      make COCOTB_MODULE=cocotb_grpc
+
+4. Wait until this message appears in the console
+
+   ``gRPC server started, listening on 50051. Device string: libnfb-ext-grpc.so:grpc:localhost:50051``
+
+5. Run your software application and specify the device string
+
+   .. code:: shell
+
+      $ nfb-eth -ri0 -d libnfb-ext-grpc.so:grpc:localhost:50051
+
+DPDK usage
+----------
+
+DPDK needs to be executed with the ``--vdev`` argument:
+
+.. code:: shell
+
+   sudo dpdk-testpmd --vdev=eth_vdev_nfb,dev=libnfb-ext-grpc.so:grpc+dma_vas:localhost:50051,queue_driver=native --iova-mode=va -- -i
+
+The ``queue_driver=native`` is currently the only supported mode, for which the
+``--iova-mode=va`` is essential. The ``dma_vas`` tag also
+must be stated in the device string:
+``libnfb-ext-grpc.so:grpc+dma_vas:localhost:50051``.
+
+Do not forget to alloc hugepages.
+
+Tips
+----
+
+Concurrent processes
+^^^^^^^^^^^^^^^^^^^^
+
+The simulation environment can handle requests from multiple running
+applications at once. For example: start the ``dpdk-testpmd`` in
+interactive mode, enable MACs with ``nfb-eth -e1`` and then type
+``start`` in the DPDK app prompt. Be aware that only one application should use
+the ``dma_vas`` tag in the device string at a time.
+
+*There is an issue with nfb locks: nfb_comp_lock / nfb_comp_unlock is not
+implemented. Two processes mixing requests on one lock-aware component
+will probably break its function.*
+
+Locally build libnfb-ext-grpc.so
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If the gRPC client library is not in the standard system path (``/usr/lib``),
+use the full path in the device parameter:
+
+.. code:: shell
+
+   nfb-info -d /home/joe/ndk-sw/cmake-build/ext/libnfb-ext-grpc/libnfb-ext-grpc.so:grpc:localhost:50051
+
+Remote access to TLS
+^^^^^^^^^^^^^^^^^^^^
+
+Listen on all IP addresses:
+
+``NfbDmaThreadedGrpcServer(ram, dev, addr='0.0.0.0')``
+
+and run the application on another machine with the ``target_addr:port`` string in the device parameter.
+
diff --git a/python/cocotbext/cocotbext/nfb/ext/grpc/__init__.py b/python/cocotbext/cocotbext/nfb/ext/grpc/__init__.py
@@ -1,3 +1,4 @@
-from .servicer import Servicer
+from .server import NfbDmaThreadedGrpcServer
+from .dma import RAM
 
-__all__ = ['Servicer']
+__all__ = ["NfbDmaThreadedGrpcServer", "RAM"]