Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 20 additions & 24 deletions common/cuda_hip/stop/residual_norm_kernels.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -41,12 +41,12 @@ __global__ __launch_bounds__(default_block_size) void residual_norm_kernel(
if (tidx < num_cols) {
if (tau[tidx] <= rel_residual_goal * orig_tau[tidx]) {
stop_status[tidx].converge(stoppingId, setFinalized);
device_storage[1] = true;
device_storage[0] = true;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we put this into a struct or behind a wrapper to avoid mixing up the two values?

// because only false is written to all_converged, write conflicts
// should not cause any problem
else if (!stop_status[tidx].has_stopped()) {
device_storage[0] = false;
device_storage[1] = false;
}
}
}
Expand All @@ -55,8 +55,10 @@ __global__ __launch_bounds__(default_block_size) void residual_norm_kernel(
__global__ __launch_bounds__(1) void init_kernel(
bool* __restrict__ device_storage)
{
device_storage[0] = true;
device_storage[1] = false;
// one_changed
device_storage[0] = false;
// all_converged
device_storage[1] = true;
}


Expand All @@ -67,7 +69,7 @@ void residual_norm(std::shared_ptr<const DefaultExecutor> exec,
ValueType rel_residual_goal, uint8 stoppingId,
bool setFinalized, array<stopping_status>* stop_status,
array<bool>* device_storage, bool* all_converged,
bool* one_changed)
bool* indicators)
{
static_assert(is_complex_s<ValueType>::value == false,
"ValueType must not be complex in this function!");
Expand All @@ -86,9 +88,10 @@ void residual_norm(std::shared_ptr<const DefaultExecutor> exec,
as_device_type(device_storage->get_data()));
}

/* Represents all_converged, one_changed */
*all_converged = get_element(*device_storage, 0);
*one_changed = get_element(*device_storage, 1);
/* Represents all_converged(1), one_changed(0) */
exec->get_master()->copy_from(exec, 2, device_storage->get_const_data(),
indicators);
*all_converged = indicators[1];
}

GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE(
Expand Down Expand Up @@ -122,35 +125,27 @@ __launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
if (tidx < num_cols) {
if (sqrt(abs(tau[tidx])) <= rel_residual_goal * orig_tau[tidx]) {
stop_status[tidx].converge(stoppingId, setFinalized);
device_storage[1] = true;
device_storage[0] = true;
}
// because only false is written to all_converged, write conflicts
// should not cause any problem
else if (!stop_status[tidx].has_stopped()) {
device_storage[0] = false;
device_storage[1] = false;
}
}
}


__global__ __launch_bounds__(1) void init_kernel(
bool* __restrict__ device_storage)
{
device_storage[0] = true;
device_storage[1] = false;
}


template <typename ValueType>
void implicit_residual_norm(
std::shared_ptr<const DefaultExecutor> exec,
const matrix::Dense<ValueType>* tau,
const matrix::Dense<remove_complex<ValueType>>* orig_tau,
remove_complex<ValueType> rel_residual_goal, uint8 stoppingId,
bool setFinalized, array<stopping_status>* stop_status,
array<bool>* device_storage, bool* all_converged, bool* one_changed)
array<bool>* device_storage, bool* all_converged, bool* indicators)
{
init_kernel<<<1, 1, 0, exec->get_stream()>>>(
residual_norm::init_kernel<<<1, 1, 0, exec->get_stream()>>>(
as_device_type(device_storage->get_data()));

const auto block_size = default_block_size;
Expand All @@ -166,9 +161,10 @@ void implicit_residual_norm(
as_device_type(device_storage->get_data()));
}

/* Represents all_converged, one_changed */
*all_converged = get_element(*device_storage, 0);
*one_changed = get_element(*device_storage, 1);
/* Represents all_converged(1), one_changed(0) */
exec->get_master()->copy_from(exec, 2, device_storage->get_const_data(),
indicators);
*all_converged = indicators[1];
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL);
Expand Down
17 changes: 8 additions & 9 deletions core/solver/bicg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ void Bicg<ValueType>::apply_dense_impl(const matrix::Dense<ValueType>* dense_b,

GKO_SOLVER_ONE_MINUS_ONE();

bool one_changed{};
GKO_SOLVER_STOP_REDUCTION_ARRAYS();

// rho = 0.0
Expand Down Expand Up @@ -195,13 +194,13 @@ void Bicg<ValueType>::apply_dense_impl(const matrix::Dense<ValueType>* dense_b,
z->compute_conj_dot(r2, rho, reduction_tmp);

++iter;
bool all_stopped =
stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status, &one_changed);
bool all_stopped = stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status,
stop_indicators.get_data());
this->template log<log::Logger::iteration_complete>(
this, dense_b, dense_x, iter, r, nullptr, rho, &stop_status,
all_stopped);
Expand Down Expand Up @@ -251,7 +250,7 @@ void Bicg<ValueType>::apply_impl(const LinOp* alpha, const LinOp* b,
template <typename ValueType>
int workspace_traits<Bicg<ValueType>>::num_arrays(const Solver&)
{
return 2;
return 3;
}


Expand Down
33 changes: 16 additions & 17 deletions core/solver/bicgstab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ void Bicgstab<ValueType>::apply_dense_impl(const VectorType* dense_b,

GKO_SOLVER_ONE_MINUS_ONE();

bool one_changed{};
GKO_SOLVER_STOP_REDUCTION_ARRAYS();

// r = dense_b
Expand Down Expand Up @@ -160,13 +159,13 @@ void Bicgstab<ValueType>::apply_dense_impl(const VectorType* dense_b,
++iter;
rr->compute_conj_dot(r, rho, reduction_tmp);

bool all_stopped =
stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status, &one_changed);
bool all_stopped = stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status,
stop_indicators.get_data());
this->template log<log::Logger::iteration_complete>(
this, dense_b, dense_x, iter, r, nullptr, rho, &stop_status,
all_stopped);
Expand All @@ -193,14 +192,14 @@ void Bicgstab<ValueType>::apply_dense_impl(const VectorType* dense_b,
gko::detail::get_local(r), gko::detail::get_local(s),
gko::detail::get_local(v), rho, alpha, beta, &stop_status));

all_stopped =
stop_criterion->update()
.num_iterations(iter)
.residual(s)
.implicit_sq_residual_norm(rho)
// .solution(dense_x) // outdated at this point
.check(RelativeStoppingId, false, &stop_status, &one_changed);
if (one_changed) {
all_stopped = stop_criterion->update()
.num_iterations(iter)
.residual(s)
.implicit_sq_residual_norm(rho)
// .solution(dense_x) // outdated at this point
.check(RelativeStoppingId, false, &stop_status,
stop_indicators.get_data());
if (stop_indicators.get_const_data()[0]) {
exec->run(bicgstab::make_finalize(gko::detail::get_local(dense_x),
gko::detail::get_local(y), alpha,
&stop_status));
Expand Down Expand Up @@ -254,7 +253,7 @@ void Bicgstab<ValueType>::apply_impl(const LinOp* alpha, const LinOp* b,
template <typename ValueType>
int workspace_traits<Bicgstab<ValueType>>::num_arrays(const Solver&)
{
return 2;
return 3;
}


Expand Down
20 changes: 11 additions & 9 deletions core/solver/cb_gmres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ void CbGmres<ValueType>::apply_dense_impl(
array<size_type> final_iter_nums(this->get_executor(), num_rhs);
auto y = Vector::create(exec, dim<2>{krylov_dim, num_rhs});

bool one_changed{};
array<bool> stop_indicators(this->get_executor()->get_master(), 2);
stop_indicators.get_data()[0] = false;
array<char> reduction_tmp{this->get_executor()};
array<stopping_status> stop_status(this->get_executor(), num_rhs);
// reorth_status and num_reorth are both helper variables for GPU
Expand Down Expand Up @@ -341,17 +342,18 @@ void CbGmres<ValueType>::apply_dense_impl(
residual_norm.get(), nullptr, &stop_status, false);
++forced_iterations;
} else {
bool all_changed = stop_criterion->update()
.num_iterations(total_iter)
.residual(residual)
.residual_norm(residual_norm)
.solution(dense_x)
.check(RelativeStoppingId, true,
&stop_status, &one_changed);
bool all_changed =
stop_criterion->update()
.num_iterations(total_iter)
.residual(residual)
.residual_norm(residual_norm)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status,
stop_indicators.get_data());
this->template log<log::Logger::iteration_complete>(
this, dense_b, dense_x, total_iter, residual.get(),
residual_norm.get(), nullptr, &stop_status, all_changed);
if (one_changed || all_changed) {
if (stop_indicators.get_const_data()[0] || all_changed) {
host_stop_status = stop_status;
bool host_array_changed{false};
for (size_type i = 0; i < host_stop_status.get_size();
Expand Down
17 changes: 8 additions & 9 deletions core/solver/cg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ void Cg<ValueType>::apply_dense_impl(const VectorType* dense_b,

GKO_SOLVER_ONE_MINUS_ONE();

bool one_changed{};
GKO_SOLVER_STOP_REDUCTION_ARRAYS();

// r = dense_b
Expand Down Expand Up @@ -146,13 +145,13 @@ void Cg<ValueType>::apply_dense_impl(const VectorType* dense_b,
r->compute_conj_dot(z, rho, reduction_tmp);

++iter;
bool all_stopped =
stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status, &one_changed);
bool all_stopped = stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status,
stop_indicators.get_data());
this->template log<log::Logger::iteration_complete>(
this, dense_b, dense_x, iter, r, nullptr, rho, &stop_status,
all_stopped);
Expand Down Expand Up @@ -202,7 +201,7 @@ void Cg<ValueType>::apply_impl(const LinOp* alpha, const LinOp* b,
template <typename ValueType>
int workspace_traits<Cg<ValueType>>::num_arrays(const Solver&)
{
return 2;
return 3;
}


Expand Down
17 changes: 8 additions & 9 deletions core/solver/cgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ void Cgs<ValueType>::apply_dense_impl(const VectorType* dense_b,

GKO_SOLVER_ONE_MINUS_ONE();

bool one_changed{};
GKO_SOLVER_STOP_REDUCTION_ARRAYS();

// r = dense_b
Expand Down Expand Up @@ -155,13 +154,13 @@ void Cgs<ValueType>::apply_dense_impl(const VectorType* dense_b,
r->compute_conj_dot(r_tld, rho, reduction_tmp);

++iter;
bool all_stopped =
stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status, &one_changed);
bool all_stopped = stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status,
stop_indicators.get_data());
this->template log<log::Logger::iteration_complete>(
this, dense_b, dense_x, iter, r, nullptr, rho, &stop_status,
all_stopped);
Expand Down Expand Up @@ -222,7 +221,7 @@ void Cgs<ValueType>::apply_impl(const LinOp* alpha, const LinOp* b,
template <typename ValueType>
int workspace_traits<Cgs<ValueType>>::num_arrays(const Solver&)
{
return 2;
return 3;
}


Expand Down
9 changes: 7 additions & 2 deletions core/solver/chebyshev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ void Chebyshev<ValueType>::apply_dense_impl(const VectorType* dense_b,
auto& stop_status = this->template create_workspace_array<stopping_status>(
ws::stop, dense_b->get_size()[1]);
exec->run(ir::make_initialize(&stop_status));
exec->run(ir::make_initialize(&stop_status));
Comment on lines 227 to +228
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

duplicate?

auto& stop_indicators =
this->template create_workspace_array<bool>(ws::indicators, 2);
stop_indicators.set_executor(this->get_executor()->get_master());
stop_indicators.get_data()[0] = false;
if (guess != initial_guess_mode::zero) {
residual->copy_from(dense_b);
this->get_system_matrix()->apply(neg_one_op, dense_x, one_op, residual);
Expand All @@ -251,7 +256,7 @@ void Chebyshev<ValueType>::apply_dense_impl(const VectorType* dense_b,
};
bool all_stopped = update_residual(
this, iter, dense_b, dense_x, residual, residual_ptr,
stop_criterion, stop_status, log_func);
stop_criterion, stop_status, &stop_indicators, log_func);
if (all_stopped) {
break;
}
Expand Down Expand Up @@ -321,7 +326,7 @@ void Chebyshev<ValueType>::apply_with_initial_guess_impl(
template <typename ValueType>
int workspace_traits<Chebyshev<ValueType>>::num_arrays(const Solver&)
{
return 1;
return 2;
}


Expand Down
17 changes: 8 additions & 9 deletions core/solver/fcg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ void Fcg<ValueType>::apply_dense_impl(const VectorType* dense_b,

GKO_SOLVER_ONE_MINUS_ONE();

bool one_changed{};
GKO_SOLVER_STOP_REDUCTION_ARRAYS();

// r = dense_b
Expand Down Expand Up @@ -148,13 +147,13 @@ void Fcg<ValueType>::apply_dense_impl(const VectorType* dense_b,
t->compute_conj_dot(z, rho_t, reduction_tmp);

++iter;
bool all_stopped =
stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status, &one_changed);
bool all_stopped = stop_criterion->update()
.num_iterations(iter)
.residual(r)
.implicit_sq_residual_norm(rho)
.solution(dense_x)
.check(RelativeStoppingId, true, &stop_status,
stop_indicators.get_data());
this->template log<log::Logger::iteration_complete>(
this, dense_b, dense_x, iter, r, nullptr, rho, &stop_status,
all_stopped);
Expand Down Expand Up @@ -204,7 +203,7 @@ void Fcg<ValueType>::apply_impl(const LinOp* alpha, const LinOp* b,
template <typename ValueType>
int workspace_traits<Fcg<ValueType>>::num_arrays(const Solver&)
{
return 2;
return 3;
}


Expand Down
Loading
Loading