ginkgo-project · MarcelKoch · May 11, 2023 · Apr 19, 2023 · Apr 21, 2023 · Apr 20, 2023
diff --git a/common/cuda_hip/preconditioner/isai_kernels.hpp.inc b/common/cuda_hip/preconditioner/isai_kernels.hpp.inc
@@ -119,19 +119,33 @@ __forceinline__ __device__ void generic_generate(
             auto m_row_begin = m_row_ptrs[col];
             auto m_row_size = m_row_ptrs[col + 1] - m_row_begin;
             // extract the dense submatrix consisting of the entries whose
-            // columns/rows match column indices from this row
+            // columns/rows match column indices from this row within the
+            // sparsity pattern of the original matrix (matches outside of that
+            // are zero)
             group_match<subwarp_size>(
                 m_col_idxs + m_row_begin, m_row_size, i_col_idxs + i_row_begin,
                 i_row_size, subwarp,
                 [&](IndexType, IndexType m_idx, IndexType i_idx,
                     config::lane_mask_type, bool valid) {
-                    rhs_one_idx += popcnt(subwarp.ballot(
-                        valid && m_col_idxs[m_row_begin + m_idx] < row &&
-                        col == row));
                     if (valid) {
                         dense_system(nz, i_idx) = m_values[m_row_begin + m_idx];
                     }
                 });
+            const auto i_transposed_row_begin = i_row_ptrs[col];
+            const auto i_transposed_row_size =
+                i_row_ptrs[col + 1] - i_transposed_row_begin;
+            // Loop over all matches that are within the sparsity pattern of
+            // the inverse to find the index of the one in the right-hand-side
+            group_match<subwarp_size>(
+                i_col_idxs + i_transposed_row_begin, i_transposed_row_size,
+                i_col_idxs + i_row_begin, i_row_size, subwarp,
+                [&](IndexType, IndexType m_idx, IndexType i_idx,
+                    config::lane_mask_type, bool valid) {
+                    rhs_one_idx += popcnt(subwarp.ballot(
+                        valid &&
+                        i_col_idxs[i_transposed_row_begin + m_idx] < row &&
+                        col == row));
+                });
         }
 
         subwarp.sync();

diff --git a/core/log/profiler_hook.cpp b/core/log/profiler_hook.cpp
@@ -289,6 +289,28 @@ void ProfilerHook::on_iteration_complete(
 }
 
 
+void ProfilerHook::on_iteration_complete(const LinOp* solver,
+                                         const size_type& num_iterations,
+                                         const LinOp* residual,
+                                         const LinOp* solution,
+                                         const LinOp* residual_norm) const
+{
+    on_iteration_complete(solver, nullptr, solution, num_iterations, residual,
+                          residual_norm, nullptr, nullptr, false);
+}
+
+
+void ProfilerHook::on_iteration_complete(
+    const LinOp* solver, const size_type& num_iterations, const LinOp* residual,
+    const LinOp* solution, const LinOp* residual_norm,
+    const LinOp* implicit_sq_residual_norm) const
+{
+    on_iteration_complete(solver, nullptr, solution, num_iterations, residual,
+                          residual_norm, implicit_sq_residual_norm, nullptr,
+                          false);
+}
+
+
 bool ProfilerHook::needs_propagation() const { return true; }
 
 

diff --git a/core/log/stream.cpp b/core/log/stream.cpp
@@ -487,6 +487,30 @@ void Stream<ValueType>::on_iteration_complete(
 }
 
 
+template <typename ValueType>
+void Stream<ValueType>::on_iteration_complete(const LinOp* solver,
+                                              const size_type& num_iterations,
+                                              const LinOp* residual,
+                                              const LinOp* solution,
+                                              const LinOp* residual_norm) const
+{
+    on_iteration_complete(solver, nullptr, solution, num_iterations, residual,
+                          residual_norm, nullptr, nullptr, false);
+}
+
+
+template <typename ValueType>
+void Stream<ValueType>::on_iteration_complete(
+    const LinOp* solver, const size_type& num_iterations, const LinOp* residual,
+    const LinOp* solution, const LinOp* residual_norm,
+    const LinOp* implicit_sq_residual_norm) const
+{
+    on_iteration_complete(solver, nullptr, solution, num_iterations, residual,
+                          residual_norm, implicit_sq_residual_norm, nullptr,
+                          false);
+}
+
+
 #define GKO_DECLARE_STREAM(_type) class Stream<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_STREAM);
 

diff --git a/core/preconditioner/isai.cpp b/core/preconditioner/isai.cpp
@@ -121,7 +121,7 @@ std::shared_ptr<Csr> extend_sparsity(std::shared_ptr<const Executor>& exec,
 template <isai_type IsaiType, typename ValueType, typename IndexType>
 void Isai<IsaiType, ValueType, IndexType>::generate_inverse(
     std::shared_ptr<const LinOp> input, bool skip_sorting, int power,
-    IndexType excess_limit)
+    IndexType excess_limit, remove_complex<ValueType> excess_solver_reduction)
 {
     using Dense = matrix::Dense<ValueType>;
     using LowerTrs = solver::LowerTrs<ValueType, IndexType>;
@@ -238,7 +238,8 @@ void Isai<IsaiType, ValueType, IndexType>::generate_inverse(
                             gko::stop::ResidualNorm<ValueType>::build()
                                 .with_baseline(gko::stop::mode::rhs_norm)
                                 .with_reduction_factor(
-                                    remove_complex<ValueType>{1e-6})
+                                    remove_complex<ValueType>{
+                                        excess_solver_reduction})
                                 .on(exec))
                         .on(exec);
                 excess_solution->copy_from(excess_rhs);

diff --git a/core/solver/multigrid.cpp b/core/solver/multigrid.cpp
@@ -563,14 +563,39 @@ void Multigrid::generate()
             auto exec = this->get_executor();
             // default coarse grid solver, direct LU
             // TODO: maybe remove fixed index type
-            auto gen_default_solver = [&] {
-                return experimental::solver::Direct<value_type, int32>::build()
-                    .with_factorization(
-                        experimental::factorization::Lu<value_type,
+            auto gen_default_solver = [&]() -> std::unique_ptr<LinOp> {
+                // TODO: unify when dpcpp supports direct solver
+                if (dynamic_cast<const DpcppExecutor*>(exec.get())) {
+                    using absolute_value_type = remove_complex<value_type>;
+                    return solver::Gmres<value_type>::build()
+                        .with_criteria(
+                            stop::Iteration::build()
+                                .with_max_iters(matrix->get_size()[0])
+                                .on(exec),
+                            stop::ResidualNorm<value_type>::build()
+                                .with_reduction_factor(
+                                    std::numeric_limits<
+                                        absolute_value_type>::epsilon() *
+                                    absolute_value_type{10})
+                                .on(exec))
+                        .with_krylov_dim(
+                            std::min(size_type(100), matrix->get_size()[0]))
+                        .with_preconditioner(
+                            preconditioner::Jacobi<value_type>::build()
+                                .with_max_block_size(1u)
+                                .on(exec))
+                        .on(exec)
+                        ->generate(matrix);
+                } else {
+                    return experimental::solver::Direct<value_type,
                                                         int32>::build()
-                            .on(exec))
-                    .on(exec)
-                    ->generate(matrix);
+                        .with_factorization(
+                            experimental::factorization::Lu<value_type,
+                                                            int32>::build()
+                                .on(exec))
+                        .on(exec)
+                        ->generate(matrix);
+                }
             };
             if (parameters_.coarsest_solver.size() == 0) {
                 coarsest_solver_ = gen_default_solver();

diff --git a/core/stop/combined.cpp b/core/stop/combined.cpp
@@ -37,6 +37,28 @@ namespace gko {
 namespace stop {
 
 
+Combined::Combined(std::shared_ptr<const gko::Executor> exec)
+    : EnablePolymorphicObject<Combined, Criterion>(std::move(exec))
+{}
+
+
+Combined::Combined(const Combined::Factory* factory, const CriterionArgs& args)
+    : EnablePolymorphicObject<Combined, Criterion>(factory->get_executor()),
+      parameters_{factory->get_parameters()}
+{
+    for (const auto& f : parameters_.criteria) {
+        // Ignore the nullptr from the list
+        if (f != nullptr) {
+            criteria_.push_back(f->generate(args));
+        }
+    }
+    // If the list are empty or all nullptr, throw gko::NotSupported
+    if (criteria_.size() == 0) {
+        GKO_NOT_SUPPORTED(this);
+    }
+}
+
+
 bool Combined::check_impl(uint8 stoppingId, bool setFinalized,
                           array<stopping_status>* stop_status,
                           bool* one_changed, const Updater& updater)
@@ -58,5 +80,31 @@ bool Combined::check_impl(uint8 stoppingId, bool setFinalized,
 }
 
 
+Combined::Factory::Factory(std::shared_ptr<const ::gko::Executor> exec)
+    : Base(std::move(exec))
+{}
+
+
+Combined::Factory::Factory(std::shared_ptr<const ::gko::Executor> exec,
+                           const Combined::parameters_type& parameters)
+    : Base(std::move(exec), parameters)
+{}
+
+
+Combined::Factory& Combined::Factory::operator=(const Combined::Factory& other)
+{
+    if (this != &other) {
+        parameters_type new_parameters;
+        new_parameters.criteria.clear();
+        for (auto criterion : other.get_parameters().criteria) {
+            new_parameters.criteria.push_back(
+                gko::clone(this->get_executor(), criterion));
+        }
+        Base::operator=(Factory(this->get_executor(), new_parameters));
+    }
+    return *this;
+}
+
+
 }  // namespace stop
 }  // namespace gko
diff --git a/core/test/base/exception_helpers.cpp b/core/test/base/exception_helpers.cpp
@@ -321,4 +321,10 @@ TEST(KernelNotFound, ThrowsKernelNotFoundException)
 }
 
 
+TEST(InvalidState, ThrowsInvalidStateException)
+{
+    ASSERT_THROW(GKO_INVALID_STATE(""), gko::InvalidStateError);
+}
+
+
 }  // namespace