Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some bug fixes for the next release #1327

Merged
merged 17 commits into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions common/cuda_hip/preconditioner/isai_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,33 @@ __forceinline__ __device__ void generic_generate(
auto m_row_begin = m_row_ptrs[col];
auto m_row_size = m_row_ptrs[col + 1] - m_row_begin;
// extract the dense submatrix consisting of the entries whose
// columns/rows match column indices from this row
// columns/rows match column indices from this row within the
// sparsity pattern of the original matrix (matches outside of that
// are zero)
group_match<subwarp_size>(
m_col_idxs + m_row_begin, m_row_size, i_col_idxs + i_row_begin,
i_row_size, subwarp,
[&](IndexType, IndexType m_idx, IndexType i_idx,
config::lane_mask_type, bool valid) {
rhs_one_idx += popcnt(subwarp.ballot(
valid && m_col_idxs[m_row_begin + m_idx] < row &&
col == row));
if (valid) {
dense_system(nz, i_idx) = m_values[m_row_begin + m_idx];
}
});
const auto i_transposed_row_begin = i_row_ptrs[col];
const auto i_transposed_row_size =
i_row_ptrs[col + 1] - i_transposed_row_begin;
// Loop over all matches that are within the sparsity pattern of
// the inverse to find the index of the one in the right-hand-side
group_match<subwarp_size>(
i_col_idxs + i_transposed_row_begin, i_transposed_row_size,
i_col_idxs + i_row_begin, i_row_size, subwarp,
[&](IndexType, IndexType m_idx, IndexType i_idx,
config::lane_mask_type, bool valid) {
rhs_one_idx += popcnt(subwarp.ballot(
valid &&
i_col_idxs[i_transposed_row_begin + m_idx] < row &&
col == row));
});
}

subwarp.sync();
Expand Down
22 changes: 22 additions & 0 deletions core/log/profiler_hook.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,28 @@ void ProfilerHook::on_iteration_complete(
}


void ProfilerHook::on_iteration_complete(const LinOp* solver,
const size_type& num_iterations,
const LinOp* residual,
const LinOp* solution,
const LinOp* residual_norm) const
{
on_iteration_complete(solver, nullptr, solution, num_iterations, residual,
residual_norm, nullptr, nullptr, false);
}


void ProfilerHook::on_iteration_complete(
const LinOp* solver, const size_type& num_iterations, const LinOp* residual,
const LinOp* solution, const LinOp* residual_norm,
const LinOp* implicit_sq_residual_norm) const
{
on_iteration_complete(solver, nullptr, solution, num_iterations, residual,
residual_norm, implicit_sq_residual_norm, nullptr,
false);
}


bool ProfilerHook::needs_propagation() const { return true; }


Expand Down
24 changes: 24 additions & 0 deletions core/log/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,30 @@ void Stream<ValueType>::on_iteration_complete(
}


template <typename ValueType>
void Stream<ValueType>::on_iteration_complete(const LinOp* solver,
const size_type& num_iterations,
const LinOp* residual,
const LinOp* solution,
const LinOp* residual_norm) const
{
on_iteration_complete(solver, nullptr, solution, num_iterations, residual,
residual_norm, nullptr, nullptr, false);
}


template <typename ValueType>
void Stream<ValueType>::on_iteration_complete(
const LinOp* solver, const size_type& num_iterations, const LinOp* residual,
const LinOp* solution, const LinOp* residual_norm,
const LinOp* implicit_sq_residual_norm) const
{
on_iteration_complete(solver, nullptr, solution, num_iterations, residual,
residual_norm, implicit_sq_residual_norm, nullptr,
false);
}


#define GKO_DECLARE_STREAM(_type) class Stream<_type>
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_STREAM);

Expand Down
5 changes: 3 additions & 2 deletions core/preconditioner/isai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ std::shared_ptr<Csr> extend_sparsity(std::shared_ptr<const Executor>& exec,
template <isai_type IsaiType, typename ValueType, typename IndexType>
void Isai<IsaiType, ValueType, IndexType>::generate_inverse(
std::shared_ptr<const LinOp> input, bool skip_sorting, int power,
IndexType excess_limit)
IndexType excess_limit, remove_complex<ValueType> excess_solver_reduction)
{
using Dense = matrix::Dense<ValueType>;
using LowerTrs = solver::LowerTrs<ValueType, IndexType>;
Expand Down Expand Up @@ -238,7 +238,8 @@ void Isai<IsaiType, ValueType, IndexType>::generate_inverse(
gko::stop::ResidualNorm<ValueType>::build()
.with_baseline(gko::stop::mode::rhs_norm)
.with_reduction_factor(
remove_complex<ValueType>{1e-6})
remove_complex<ValueType>{
excess_solver_reduction})
.on(exec))
.on(exec);
excess_solution->copy_from(excess_rhs);
Expand Down
39 changes: 32 additions & 7 deletions core/solver/multigrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,14 +563,39 @@ void Multigrid::generate()
auto exec = this->get_executor();
// default coarse grid solver, direct LU
// TODO: maybe remove fixed index type
auto gen_default_solver = [&] {
return experimental::solver::Direct<value_type, int32>::build()
.with_factorization(
experimental::factorization::Lu<value_type,
auto gen_default_solver = [&]() -> std::unique_ptr<LinOp> {
// TODO: unify when dpcpp supports direct solver
if (dynamic_cast<const DpcppExecutor*>(exec.get())) {
using absolute_value_type = remove_complex<value_type>;
return solver::Gmres<value_type>::build()
.with_criteria(
stop::Iteration::build()
.with_max_iters(matrix->get_size()[0])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you test the default behavior on a decently-sized matrix? If we have the choice between taking forever and producing a precise result, and returning an imprecise result after a shorter time, I would prefer going for the latter.

Copy link
Member Author

@MarcelKoch MarcelKoch May 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what I did here, but I wanted it to be min(100, matrix->get_size()[0]). I just wanted to make sure that for small matrices, we don't use more krylov vectors than the matrix size. Wrong place.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think as default, we should solve exactly on the coarsest level. If that can't be achieved in reasonable time, then the problem might be too difficult for the default settings and the user needs to specify the solver anyway.

.on(exec),
stop::ResidualNorm<value_type>::build()
.with_reduction_factor(
std::numeric_limits<
absolute_value_type>::epsilon() *
absolute_value_type{10})
.on(exec))
.with_krylov_dim(
std::min(size_type(100), matrix->get_size()[0]))
.with_preconditioner(
preconditioner::Jacobi<value_type>::build()
.with_max_block_size(1u)
.on(exec))
.on(exec)
->generate(matrix);
} else {
return experimental::solver::Direct<value_type,
int32>::build()
.on(exec))
.on(exec)
->generate(matrix);
.with_factorization(
experimental::factorization::Lu<value_type,
int32>::build()
.on(exec))
.on(exec)
->generate(matrix);
}
};
if (parameters_.coarsest_solver.size() == 0) {
coarsest_solver_ = gen_default_solver();
Expand Down
48 changes: 48 additions & 0 deletions core/stop/combined.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,28 @@ namespace gko {
namespace stop {


Combined::Combined(std::shared_ptr<const gko::Executor> exec)
: EnablePolymorphicObject<Combined, Criterion>(std::move(exec))
{}


Combined::Combined(const Combined::Factory* factory, const CriterionArgs& args)
: EnablePolymorphicObject<Combined, Criterion>(factory->get_executor()),
parameters_{factory->get_parameters()}
{
for (const auto& f : parameters_.criteria) {
// Ignore the nullptr from the list
if (f != nullptr) {
criteria_.push_back(f->generate(args));
}
}
// If the list are empty or all nullptr, throw gko::NotSupported
if (criteria_.size() == 0) {
GKO_NOT_SUPPORTED(this);
}
}


bool Combined::check_impl(uint8 stoppingId, bool setFinalized,
array<stopping_status>* stop_status,
bool* one_changed, const Updater& updater)
Expand All @@ -58,5 +80,31 @@ bool Combined::check_impl(uint8 stoppingId, bool setFinalized,
}


Combined::Factory::Factory(std::shared_ptr<const ::gko::Executor> exec)
: Base(std::move(exec))
{}


Combined::Factory::Factory(std::shared_ptr<const ::gko::Executor> exec,
const Combined::parameters_type& parameters)
: Base(std::move(exec), parameters)
{}


Combined::Factory& Combined::Factory::operator=(const Combined::Factory& other)
{
if (this != &other) {
parameters_type new_parameters;
new_parameters.criteria.clear();
for (auto criterion : other.get_parameters().criteria) {
new_parameters.criteria.push_back(
gko::clone(this->get_executor(), criterion));
}
Base::operator=(Factory(this->get_executor(), new_parameters));
}
return *this;
}


} // namespace stop
} // namespace gko
6 changes: 6 additions & 0 deletions core/test/base/exception_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,4 +321,10 @@ TEST(KernelNotFound, ThrowsKernelNotFoundException)
}


TEST(InvalidState, ThrowsInvalidStateException)
{
ASSERT_THROW(GKO_INVALID_STATE(""), gko::InvalidStateError);
}


} // namespace
Loading