Skip to content

Commit

Permalink
Merge branch 'spinler-feat-dpdk_toplevel_sim' into 'devel'
Browse files Browse the repository at this point in the history
Introduce Top-level simulation example for external processes using libnfb-ext-grpc plugin

See merge request ndk/ndk-fpga!133
  • Loading branch information
jakubcabal committed Jan 10, 2025
2 parents 9e857a3 + 6b4b9fc commit fc40cfe
Show file tree
Hide file tree
Showing 11 changed files with 511 additions and 178 deletions.
80 changes: 80 additions & 0 deletions apps/minimal/tests/cocotb/cocotb_grpc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#import sys
import logging

import cocotb
from cocotb.triggers import Timer

import scapy.all
import scapy.utils
import scapy.volatile
import scapy.contrib.mpls

from ndk_core import NFBDevice

import cocotbext.ofm.utils.sim.modelsim as ms
import cocotb.utils

from cocotbext.ofm.utils.sim.bus import MfbBus, MiBus
from cocotbext.nfb.ext.grpc import RAM, NfbDmaThreadedGrpcServer


#logging.basicConfig(stream=sys.stderr, force=True)
#logging.getLogger().setLevel(logging.DEBUG)

logger = logging.getLogger(__name__)

e = cocotb.external
st = cocotb.utils.get_sim_time


async def get_dev(dut, init=True, **kwargs):
dev = NFBDevice(dut, **kwargs)
if init:
await dev.init()
return dev, dev.nfb


@cocotb.test()
async def test_grpc(dut):
ram = RAM()
dev, nfb = await get_dev(dut, ram=ram)

# Generate packets on RX eth
async def rx_packet(eth, count):
for _ in range(count):
pkt = scapy.all.Ether()/scapy.all.IP(dst="127.0.0.1")/scapy.all.TCP()/"GET /index.html HTTP/1.0 \n\n"
await eth.write_packet(list(bytes(pkt)))

for rx in dev._eth_rx_driver:
cocotb.start_soon(rx_packet(rx, 50000))

# Log packets on TX eth
for i, tx in enumerate(dev._eth_tx_monitor):
def eth_tx_monitor_cb(p):
logger.debug(f"tx_eth{i} packet transmitted: len={len(p)}, data={bytes(p).hex()}")
tx.add_callback(eth_tx_monitor_cb)

# Run gRPC server with Nfb and Dma services usable for libnfb-ext-grpc
with NfbDmaThreadedGrpcServer(ram, dev):
await Timer(10, units='ms')


core = NFBDevice.core_instance_from_top(cocotb.top)

pcic = core.pcie_i.pcie_core_i
#ms.cmd(f"log -recursive {ms.cocotb2path(core)}/*")

ms.add_wave(core.pcie_i.MI_RESET)
ms.add_wave(core.pcie_i.MI_CLK)
MiBus(core.pcie_i, 'MI', 0, label='MI_PCIe').add_wave()

ms.add_wave(core.app_i.MI_CLK)
MiBus(core.app_i, 'MI', label='MI_APP').add_wave()

ms.add_wave(core.app_i.CLK_ETH[0])
MfbBus(core.app_i, 'ETH_RX_MFB', 0).add_wave()
MfbBus(core.app_i, 'ETH_TX_MFB', 0).add_wave()

ms.add_wave(core.app_i.DMA_CLK)
MfbBus(core.app_i, 'DMA_RX_MFB', 0).add_wave()
MfbBus(core.app_i, 'DMA_TX_MFB', 0).add_wave()
33 changes: 18 additions & 15 deletions apps/minimal/tests/cocotb/cocotb_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys
#import sys
import logging
import cocotb
#import logging
from cocotb.triggers import Timer

from ndk_core import NFBDevice
Expand All @@ -11,15 +11,20 @@
from cocotbext.ofm.utils.sim.bus import MfbBus, MiBus, DmaUpMvbBus, DmaDownMvbBus


#logging.basicConfig(stream=sys.stderr, force=True)
#logging.getLogger().setLevel(logging.INFO)

logger = logging.getLogger(__name__)

# Shortcuts
e = cocotb.external
st = cocotb.utils.get_sim_time


async def get_dev(dut, init=True):
dev = NFBDevice(dut)
async def get_dev(dut, init=True, **kwargs):
dev = NFBDevice(dut, **kwargs)
if init:
await dev.init()
# dev._servicer._log.setLevel(logging.DEBUG)
return dev, dev.nfb


Expand Down Expand Up @@ -89,11 +94,10 @@ async def _test_ndp_sendmsg(dut, dev=None, nfb=None):

pkt = bytes([i for i in range(72)])

def eth_tx_monitor_cb(p):
print(len(p), bytes(p).hex())
#assert bytes(p) == pkt

dev._eth_tx_monitor[0].add_callback(eth_tx_monitor_cb)
for i, tx in enumerate(dev._eth_tx_monitor):
def eth_tx_monitor_cb(p):
logger.debug(f"tx_eth{i} packet transmitted: len={len(p)}, data={bytes(p).hex()}")
tx.add_callback(eth_tx_monitor_cb)

count = 1
for i in range(count):
Expand All @@ -113,11 +117,10 @@ async def _test_ndp_sendmsg_burst(dut, dev=None, nfb=None):
await e(eth.txmac.reset_stats)()
await e(eth.txmac.enable)()

def eth_tx_monitor_cb(p):
print(len(p), bytes(p).hex())
sys.stdout.flush()

dev._eth_tx_monitor[0].add_callback(eth_tx_monitor_cb)
for i, tx in enumerate(dev._eth_tx_monitor):
def eth_tx_monitor_cb(p):
logger.debug(f"tx_eth{i} packet transmitted: len={len(p)}, data={bytes(p).hex()}")
tx.add_callback(eth_tx_monitor_cb)

pkts = range(20, 28)
for i in pkts:
Expand Down
6 changes: 6 additions & 0 deletions apps/minimal/tests/cocotb/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ dependencies = [
"cocotbext-ofm[nfb] @ ${NDK_FPGA_COCOTBEXT_OFM_URL}",
]

[project.optional-dependencies]
grpc = [
"scapy",
"cocotbext-ofm[nfb_grpcio] @ ${NDK_FPGA_COCOTBEXT_OFM_URL}",
]

[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
148 changes: 148 additions & 0 deletions apps/minimal/tests/cocotb/readme.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
=====================
Top-level simulations
=====================

The top-level simulations (TLS) are suitable for:

- software testing without access to real acceleration card
- whole design compile and basic functionality check
- address space debugging
- resets, clocks, clock domain crossings check

Basics
======

What simulation includes / excludes
-----------------------------------

The TLS doesn’t simulate IP cores but emulates their I/O signals. The
primary effort is to emulate the Ethernet and PCIe I/O and DMA for the most used FPGA families
(Xilinx US+, Intel P-TILE, Intel E-TILE, ...).

Common tips for cocotb
----------------------

- Use ``sys.stderr`` stream for ModelSim / Questa to achieve instant
display of log:

.. code:: python
logging.basicConfig(stream=sys.stderr, force=True)
- Verbose messages for all loggers except some:

.. code:: python
logging.getLogger().setLevel(logging.INFO)
logging.getLogger("cocotbext.nfb.ext.grpc.server").setLevel(logging.WARNING)
gRPC example
============

This variant enables the usage of external processes and is intended for
manual interaction. The TLS doesn’t contain any real test cases. Instead, it
starts the gRPC server, runs for a specified simulation time (e.g., 10ms), and
expects the user to execute an application that uses the acceleration
card through the ``libnfb`` gRPC extension. The application then generates
I/O read and write requests and the simulator translates them to the bus
interface and back.

A feature in ``libnfb-ext-grpc`` enables simple handling of DMA requests
from the simulated design. If the ``dma_vas`` tag is present in the device
string, the library opens a reverse stream. As soon as the DMA request
arrives in the process, ``libnfb-ext-grpc`` copies data from/to the virtual
address space of the process. Just use the virtual address for
descriptor values. There are no boundary checks and the request can
potentially harm the process, which probably gets killed by ``SIGSEGV``
signal in the case of an error.

The simple DMA request handling is most suitable for a DPDK application;
see the section below.

Prerequisites
-------------

libnfb-ext-grpc.so from the libnfb-ext-grpc package (RPM)

Running
-------

1. Prepare a Python environment and install packages

.. code:: shell
. ./prepare.sh
2. Install specific dependencies for the gRPC-based simulation

.. code:: shell
pip install .[grpc]
3. Run the simulation

.. code:: shell
make COCOTB_MODULE=cocotb_grpc
4. Wait until this message appears in the console

``gRPC server started, listening on 50051. Device string: libnfb-ext-grpc.so:grpc:localhost:50051``

5. Run your software application and specify the device string

.. code:: shell
$ nfb-eth -ri0 -d libnfb-ext-grpc.so:grpc:localhost:50051
DPDK usage
----------

DPDK needs to be executed with the ``--vdev`` argument:

.. code:: shell
sudo dpdk-testpmd --vdev=eth_vdev_nfb,dev=libnfb-ext-grpc.so:grpc+dma_vas:localhost:50051,queue_driver=native --iova-mode=va -- -i
The ``queue_driver=native`` is currently the only supported mode, for which the
``--iova-mode=va`` is essential. The ``dma_vas`` tag also
must be stated in the device string:
``libnfb-ext-grpc.so:grpc+dma_vas:localhost:50051``.

Do not forget to alloc hugepages.

Tips
----

Concurrent processes
^^^^^^^^^^^^^^^^^^^^

The simulation environment can handle requests from multiple running
applications at once. For example: start the ``dpdk-testpmd`` in
interactive mode, enable MACs with ``nfb-eth -e1`` and then type
``start`` in the DPDK app prompt. Be aware that only one application should use
the ``dma_vas`` tag in the device string at a time.

*There is an issue with nfb locks: nfb_comp_lock / nfb_comp_unlock is not
implemented. Two processes mixing requests on one lock-aware component
will probably break its function.*

Locally build libnfb-ext-grpc.so
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

If the gRPC client library is not in the standard system path (``/usr/lib``),
use the full path in the device parameter:

.. code:: shell
nfb-info -d /home/joe/ndk-sw/cmake-build/ext/libnfb-ext-grpc/libnfb-ext-grpc.so:grpc:localhost:50051
Remote access to TLS
^^^^^^^^^^^^^^^^^^^^

Listen on all IP addresses:

``NfbDmaThreadedGrpcServer(ram, dev, addr='0.0.0.0')``

and run the application on another machine with the ``target_addr:port`` string in the device parameter.

5 changes: 3 additions & 2 deletions python/cocotbext/cocotbext/nfb/ext/grpc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .servicer import Servicer
from .server import NfbDmaThreadedGrpcServer
from .dma import RAM

__all__ = ['Servicer']
__all__ = ["NfbDmaThreadedGrpcServer", "RAM"]
Loading

0 comments on commit fc40cfe

Please sign in to comment.