Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial polyphase channelizer operator #459

Merged
merged 2 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs_input/api/signalimage/filtering/channelize_poly.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
.. _channelize_poly_func:

channelize_poly
===============

Polyphase channelizer with a configurable number of channels

.. doxygenfunction:: matx::channelize_poly(const InType &in, const FilterType &f, index_t num_channels, index_t decimation_factor)

Examples
~~~~~~~~

.. literalinclude:: ../../../../test/00_transform/ChannelizePoly.cu
:language: cpp
:start-after: example-begin channelize_poly-test-1
:end-before: example-end channelize_poly-test-1
:dedent:

1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(examples
simple_radar_pipeline
recursive_filter
channelize_poly_bench
convolution
conv2d
cgsolve
Expand Down
143 changes: 143 additions & 0 deletions examples/channelize_poly_bench.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
////////////////////////////////////////////////////////////////////////////////
// BSD 3-Clause License
//
// Copyright (c) 2021, NVIDIA Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/////////////////////////////////////////////////////////////////////////////////

#include "matx.h"
#include <cassert>
#include <cstdio>
#include <cmath>
#include <memory>
#include <fstream>
#include <istream>
#include <cuda/std/complex>

using namespace matx;

// This example is used primarily for development purposes to benchmark the performance of the
// polyphase channelizer kernel(s). Typically, the parameters below (batch size, filter
// length, input signal length, and channel range) will be adjusted to a range of interest
// and the benchmark will be run with and without the proposed kernel changes.

constexpr int NUM_WARMUP_ITERATIONS = 2;

// Number of iterations per timed test. Iteration times are averaged in the report.
constexpr int NUM_ITERATIONS = 20;

template <typename InType, typename OutType>
void ChannelizePolyBench(matx::index_t channel_start, matx::index_t channel_stop)
cliffburdick marked this conversation as resolved.
Show resolved Hide resolved
{
struct {
matx::index_t num_batches;
matx::index_t filter_len_per_channel;
matx::index_t input_len;
} test_cases[] = {
{ 1, 17, 256 },
{ 1, 17, 3000 },
{ 1, 17, 31000 },
{ 1, 17, 256000 },
{ 42, 17, 256000 },
{ 128, 17, 256000 },
};

cudaStream_t stream;
cudaStreamCreate(&stream);
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);


for (size_t i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); i++) {
for (matx::index_t num_channels = channel_start; num_channels <= channel_stop; num_channels++) {
const matx::index_t num_batches = test_cases[i].num_batches;
const matx::index_t filter_len = test_cases[i].filter_len_per_channel * num_channels;
const matx::index_t input_len = test_cases[i].input_len;
const matx::index_t output_len_per_channel = (input_len + num_channels - 1) / num_channels;

auto input = matx::make_tensor<InType, 2>({num_batches, input_len});
auto filter = matx::make_tensor<InType, 1>({filter_len});
auto output = matx::make_tensor<OutType, 3>({num_batches, output_len_per_channel, num_channels});

input.PrefetchDevice(stream);
filter.PrefetchDevice(stream);
output.PrefetchDevice(stream);
cudaStreamSynchronize(stream);
tbensonatl marked this conversation as resolved.
Show resolved Hide resolved

const matx::index_t decimation_factor = num_channels;

for (int k = 0; k < NUM_WARMUP_ITERATIONS; k++) {
(output = channelize_poly(input, filter, num_channels, decimation_factor)).run(stream);
}

cudaStreamSynchronize(stream);

double total_elapsed_ms = 0.0;
float iter_elapsed_ms = 0.0f;
for (int k = 0; k < NUM_ITERATIONS; k++) {
cudaEventRecord(start);
cliffburdick marked this conversation as resolved.
Show resolved Hide resolved
(output = channelize_poly(input, filter, num_channels, decimation_factor)).run(stream);
cudaEventRecord(stop);
cudaStreamSynchronize(stream);
CUDA_CHECK_LAST_ERROR();
cudaEventElapsedTime(&iter_elapsed_ms, start, stop);
total_elapsed_ms += iter_elapsed_ms;
}

const double avg_elapsed_us = (total_elapsed_ms/NUM_ITERATIONS)*1.0e3;
printf("Batches: %5lld Channels: %5lld FilterLen: %5lld InputLen: %7lld Elapsed Usecs: %12.1f MPts/sec: %12.3f\n",
num_batches, num_channels, filter_len, input_len, avg_elapsed_us, static_cast<double>(num_batches*num_channels*output_len_per_channel)/1.0e6/(avg_elapsed_us/1.0e6));
}
printf("\n");
}

CUDA_CHECK_LAST_ERROR();

cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaStreamDestroy(stream);
}

int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
{
MATX_ENTER_HANDLER();

const matx::index_t channel_start = 3;
const matx::index_t channel_stop = 10;
printf("Benchmarking float -> complex<float>\n");
ChannelizePolyBench<float,cuda::std::complex<float>>(channel_start, channel_stop);

printf("Benchmarking double -> complex<double>\n");
ChannelizePolyBench<double,cuda::std::complex<double>>(channel_start, channel_stop);

printf("Benchmarking complex<double> -> complex<double>\n");
ChannelizePolyBench<double,cuda::std::complex<double>>(channel_start, channel_stop);

MATX_EXIT_HANDLER();
}
8 changes: 4 additions & 4 deletions include/matx/core/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ namespace matx

#define MATX_EXIT_HANDLER() \
} \
catch (detail::matxException & e) \
catch (matx::detail::matxException & e) \
{ \
fprintf(stderr, "%s\n", e.what()); \
fprintf(stderr, "Stack Trace:\n%s", e.stack.str().c_str()); \
Expand All @@ -175,7 +175,7 @@ namespace matx

#define MATX_THROW(e, str) \
{ \
throw detail::matxException(e, str, __FILE__, __LINE__); \
throw matx::detail::matxException(e, str, __FILE__, __LINE__); \
}

#ifndef NDEBUG
Expand Down Expand Up @@ -225,7 +225,7 @@ namespace matx
if (e != cudaSuccess) \
{ \
fprintf(stderr, "CUDA Error: %s\n", cudaGetErrorString(e)); \
MATX_THROW(matxCudaError, cudaGetErrorString(e)); \
MATX_THROW(matx::matxCudaError, cudaGetErrorString(e)); \
}

// Macro for checking cuda errors following a cuda launch or api call
Expand All @@ -240,7 +240,7 @@ namespace matx
if constexpr (Rank() > 0) { \
_Pragma("unroll") \
for (int32_t i = 0; i < Rank(); i++) { \
[[maybe_unused]] index_t size = detail::get_expanded_size<Rank()>(op, i); \
[[maybe_unused]] index_t size = matx::detail::get_expanded_size<Rank()>(op, i); \
MATX_ASSERT_STR(size == 0 || size == Size(i), matxInvalidSize, "incompatible op sizes:" + str()); \
} \
}
Expand Down
24 changes: 12 additions & 12 deletions include/matx/core/pybind.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,29 +242,29 @@ class MatXPybind {
if constexpr (is_complex_v<T1> || is_complex_v<T2>) {
if (debug) {
printf("FileName=%s Vector=%f%+f File=%f%+f\n", name.c_str(),
static_cast<float>(ut_data.real()),
static_cast<float>(ut_data.imag()),
static_cast<float>(file_data.real()),
static_cast<float>(file_data.imag()));
static_cast<double>(ut_data.real()),
static_cast<double>(ut_data.imag()),
static_cast<double>(file_data.real()),
static_cast<double>(file_data.imag()));
}

if (fabs(static_cast<float>(ut_data.real()) -
static_cast<float>(file_data.real())) > thresh) {
if (fabs(static_cast<double>(ut_data.real()) -
static_cast<double>(file_data.real())) > thresh) {
return false;
}
if (fabs(static_cast<float>(ut_data.imag()) -
static_cast<float>(file_data.imag())) > thresh) {
if (fabs(static_cast<double>(ut_data.imag()) -
static_cast<double>(file_data.imag())) > thresh) {
return false;
}
}
else {
if (debug) {
std::cout << "FileName=" << name.c_str()
<< " Vector=" << static_cast<float>(ut_data)
<< " File=" << static_cast<float>(file_data) << "\n";
<< " Vector=" << static_cast<double>(ut_data)
<< " File=" << static_cast<double>(file_data) << "\n";
}
else if (fabs(static_cast<float>(ut_data) -
static_cast<float>(file_data)) > thresh) {
else if (fabs(static_cast<double>(ut_data) -
static_cast<double>(file_data)) > thresh) {
return false;
}
}
Expand Down
Loading