Line data Source code
1 : /**
2 : * @file linear_solver_bicgstab.c
3 : * @brief BiCGSTAB solver - scalar CPU implementation
4 : *
5 : * Biconjugate Gradient Stabilized (BiCGSTAB) method for solving Ax = b where A
6 : * may be non-symmetric. This is useful for advection-dominated problems where
7 : * the discretized operator is not symmetric.
8 : *
9 : * BiCGSTAB characteristics:
10 : * - Works for non-symmetric matrices (unlike CG which requires SPD)
11 : * - Smoother convergence than BiCG (avoids irregular convergence behavior)
12 : * - Requires 2 matrix-vector products per iteration
13 : * - Requires 4 inner products and 4 axpy operations per iteration
14 : * - Needs storage for 6 vectors: r, r_hat, p, v, s, t
15 : *
16 : * Algorithm (van der Vorst, 1992):
17 : * r_0 = b - A*x_0
18 : * r_hat = r_0 (arbitrary, typically r_0)
19 : * rho_0 = alpha = omega = 1
20 : * v_0 = p_0 = 0
21 : *
22 : * for k = 1, 2, 3, ...
23 : * rho_k = (r_hat, r_{k-1})
24 : * beta = (rho_k / rho_{k-1}) * (alpha / omega)
25 : * p_k = r_{k-1} + beta * (p_{k-1} - omega * v_{k-1})
26 : * v_k = A * p_k
27 : * alpha = rho_k / (r_hat, v_k)
28 : * s = r_{k-1} - alpha * v_k
29 : * t = A * s
30 : * omega = (t, s) / (t, t)
31 : * x_k = x_{k-1} + alpha * p_k + omega * s
32 : * r_k = s - omega * t
33 : *
34 : * Note: For the Poisson equation with symmetric BCs, CG is preferred.
35 : * BiCGSTAB is provided for future use with non-symmetric operators.
36 : */
37 :
38 : #include "../linear_solver_internal.h"
39 :
40 : #include "cfd/core/indexing.h"
41 : #include "cfd/core/memory.h"
42 :
43 : #include <math.h>
44 : #include <stdio.h>
45 : #include <string.h>
46 :
47 : /* ============================================================================
48 : * BICGSTAB CONTEXT
49 : * ============================================================================ */
50 :
51 : typedef struct {
52 : double dx2; /* dx^2 */
53 : double dy2; /* dy^2 */
54 : double inv_dz2; /* 1/dz^2 (0 for 2D) */
55 :
56 : size_t stride_z; /* nx*ny for 3D, 0 for 2D */
57 : size_t k_start; /* first interior k index */
58 : size_t k_end; /* one-past-last interior k index */
59 :
60 : /* BiCGSTAB working vectors (allocated during init) */
61 : double* r; /* Residual vector */
62 : double* r_hat; /* Shadow residual (typically r_0) */
63 : double* p; /* Search direction */
64 : double* v; /* A * p */
65 : double* s; /* Intermediate residual */
66 : double* t; /* A * s */
67 :
68 : int initialized;
69 : } bicgstab_context_t;
70 :
71 : /* ============================================================================
72 : * HELPER FUNCTIONS
73 : * ============================================================================ */
74 :
75 : /**
76 : * Compute dot product of two vectors (interior points only)
77 : */
78 360 : static double dot_product(const double* a, const double* b,
79 : size_t nx, size_t ny,
80 : size_t k_start, size_t k_end, size_t stride_z) {
81 360 : double sum = 0.0;
82 1925 : for (size_t k = k_start; k < k_end; k++) {
83 34704 : for (size_t j = 1; j < ny - 1; j++) {
84 1016176 : for (size_t i = 1; i < nx - 1; i++) {
85 982673 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
86 982673 : sum += a[idx] * b[idx];
87 : }
88 : }
89 : }
90 911 : return sum;
91 : }
92 :
93 : /**
94 : * Compute y = y + alpha * x (interior points only)
95 : */
96 : static void axpy(double alpha, const double* x, double* y,
97 : size_t nx, size_t ny,
98 : size_t k_start, size_t k_end, size_t stride_z) {
99 36 : for (size_t k = k_start; k < k_end; k++) {
100 528 : for (size_t j = 1; j < ny - 1; j++) {
101 8432 : for (size_t i = 1; i < nx - 1; i++) {
102 7936 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
103 7936 : y[idx] += alpha * x[idx];
104 : }
105 : }
106 : }
107 : }
108 :
109 : /**
110 : * Apply negative Laplacian operator: Ap = -nabla^2(p)
111 : * For Poisson equation: nabla^2(x) = rhs, we solve -nabla^2(x) = -rhs
112 : * which is SPD with positive eigenvalues.
113 : */
114 360 : static void apply_laplacian(const double* p, double* Ap,
115 : size_t nx, size_t ny,
116 : double dx2, double dy2, double inv_dz2,
117 : size_t k_start, size_t k_end, size_t stride_z) {
118 360 : double dx2_inv = 1.0 / dx2;
119 360 : double dy2_inv = 1.0 / dy2;
120 :
121 748 : for (size_t k = k_start; k < k_end; k++) {
122 11344 : for (size_t j = 1; j < ny - 1; j++) {
123 334512 : for (size_t i = 1; i < nx - 1; i++) {
124 323556 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
125 :
126 323556 : double laplacian =
127 323556 : (p[idx + 1] - 2.0 * p[idx] + p[idx - 1]) * dx2_inv
128 323556 : + (p[idx + nx] - 2.0 * p[idx] + p[idx - nx]) * dy2_inv
129 323556 : + (p[idx + stride_z] + p[idx - stride_z] - 2.0 * p[idx]) * inv_dz2;
130 323556 : Ap[idx] = -laplacian;
131 : }
132 : }
133 : }
134 360 : }
135 :
136 : /**
137 : * Compute initial residual: r = b - A*x
138 : * For our formulation: r = -rhs - (-nabla^2 x) = -rhs + nabla^2 x
139 : */
140 9 : static void compute_residual(const double* x, const double* rhs, double* r,
141 : size_t nx, size_t ny,
142 : double dx2, double dy2, double inv_dz2,
143 : size_t k_start, size_t k_end, size_t stride_z) {
144 9 : double dx2_inv = 1.0 / dx2;
145 9 : double dy2_inv = 1.0 / dy2;
146 :
147 46 : for (size_t k = k_start; k < k_end; k++) {
148 672 : for (size_t j = 1; j < ny - 1; j++) {
149 12640 : for (size_t i = 1; i < nx - 1; i++) {
150 12005 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
151 :
152 : /* Laplacian(x) = d^2x/dx^2 + d^2x/dy^2 */
153 12005 : double laplacian =
154 12005 : (x[idx + 1] - 2.0 * x[idx] + x[idx - 1]) * dx2_inv
155 12005 : + (x[idx + nx] - 2.0 * x[idx] + x[idx - nx]) * dy2_inv
156 12005 : + (x[idx + stride_z] + x[idx - stride_z] - 2.0 * x[idx]) * inv_dz2;
157 :
158 : /* r = b - Ax = -rhs - (-laplacian) = -rhs + laplacian */
159 12005 : r[idx] = -rhs[idx] + laplacian;
160 : }
161 : }
162 : }
163 9 : }
164 :
165 : /**
166 : * Copy vector: dst = src (interior points only)
167 : */
168 9 : static void copy_vector(const double* src, double* dst,
169 : size_t nx, size_t ny,
170 : size_t k_start, size_t k_end, size_t stride_z) {
171 46 : for (size_t k = k_start; k < k_end; k++) {
172 672 : for (size_t j = 1; j < ny - 1; j++) {
173 12640 : for (size_t i = 1; i < nx - 1; i++) {
174 12005 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
175 12005 : dst[idx] = src[idx];
176 : }
177 : }
178 : }
179 : }
180 :
181 : /**
182 : * Set vector to zero (interior points only)
183 : */
184 : static void zero_vector(double* v, size_t nx, size_t ny,
185 : size_t k_start, size_t k_end, size_t stride_z) {
186 74 : for (size_t k = k_start; k < k_end; k++) {
187 1344 : for (size_t j = 1; j < ny - 1; j++) {
188 25280 : for (size_t i = 1; i < nx - 1; i++) {
189 24010 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
190 24010 : v[idx] = 0.0;
191 : }
192 : }
193 : }
194 : }
195 :
196 : /* ============================================================================
197 : * BICGSTAB SCALAR IMPLEMENTATION
198 : * ============================================================================ */
199 :
200 10 : static cfd_status_t bicgstab_scalar_init(
201 : poisson_solver_t* solver,
202 : size_t nx, size_t ny, size_t nz,
203 : double dx, double dy, double dz,
204 : const poisson_solver_params_t* params)
205 : {
206 10 : (void)params;
207 :
208 10 : bicgstab_context_t* ctx = (bicgstab_context_t*)cfd_calloc(1, sizeof(bicgstab_context_t));
209 10 : if (!ctx) {
210 : return CFD_ERROR_NOMEM;
211 : }
212 :
213 10 : ctx->dx2 = dx * dx;
214 10 : ctx->dy2 = dy * dy;
215 10 : ctx->inv_dz2 = poisson_solver_compute_inv_dz2(dz);
216 10 : poisson_solver_compute_3d_bounds(nz, nx, ny, &ctx->stride_z, &ctx->k_start, &ctx->k_end);
217 :
218 : /* Allocate working vectors */
219 10 : size_t n = nx * ny * nz;
220 10 : ctx->r = (double*)cfd_calloc(n, sizeof(double));
221 10 : ctx->r_hat = (double*)cfd_calloc(n, sizeof(double));
222 10 : ctx->p = (double*)cfd_calloc(n, sizeof(double));
223 10 : ctx->v = (double*)cfd_calloc(n, sizeof(double));
224 10 : ctx->s = (double*)cfd_calloc(n, sizeof(double));
225 10 : ctx->t = (double*)cfd_calloc(n, sizeof(double));
226 :
227 10 : if (!ctx->r || !ctx->r_hat || !ctx->p || !ctx->v || !ctx->s || !ctx->t) {
228 0 : cfd_free(ctx->r);
229 0 : cfd_free(ctx->r_hat);
230 0 : cfd_free(ctx->p);
231 0 : cfd_free(ctx->v);
232 0 : cfd_free(ctx->s);
233 0 : cfd_free(ctx->t);
234 0 : cfd_free(ctx);
235 0 : return CFD_ERROR_NOMEM;
236 : }
237 :
238 10 : ctx->initialized = 1;
239 10 : solver->context = ctx;
240 10 : return CFD_SUCCESS;
241 : }
242 :
243 11 : static void bicgstab_scalar_destroy(poisson_solver_t* solver) {
244 11 : if (solver && solver->context) {
245 10 : bicgstab_context_t* ctx = (bicgstab_context_t*)solver->context;
246 10 : cfd_free(ctx->r);
247 10 : cfd_free(ctx->r_hat);
248 10 : cfd_free(ctx->p);
249 10 : cfd_free(ctx->v);
250 10 : cfd_free(ctx->s);
251 10 : cfd_free(ctx->t);
252 10 : cfd_free(ctx);
253 10 : solver->context = NULL;
254 : }
255 11 : }
256 :
257 : /**
258 : * BiCGSTAB solve function
259 : *
260 : * BiCGSTAB uses its own solve loop because:
261 : * 1. It maintains complex state across iterations (rho, omega, alpha)
262 : * 2. Has two convergence checks per iteration (after s and after r)
263 : * 3. Requires two matrix-vector products per iteration
264 : */
265 9 : static cfd_status_t bicgstab_scalar_solve(
266 : poisson_solver_t* solver,
267 : double* x,
268 : double* x_temp,
269 : const double* rhs,
270 : poisson_solver_stats_t* stats)
271 : {
272 9 : (void)x_temp; /* BiCGSTAB doesn't use the temp buffer */
273 :
274 9 : bicgstab_context_t* ctx = (bicgstab_context_t*)solver->context;
275 9 : size_t nx = solver->nx;
276 9 : size_t ny = solver->ny;
277 9 : double dx2 = ctx->dx2;
278 9 : double dy2 = ctx->dy2;
279 9 : size_t stride_z = ctx->stride_z;
280 9 : size_t k_start = ctx->k_start;
281 9 : size_t k_end = ctx->k_end;
282 9 : double inv_dz2 = ctx->inv_dz2;
283 :
284 9 : double* r = ctx->r;
285 9 : double* r_hat = ctx->r_hat;
286 9 : double* p = ctx->p;
287 9 : double* v = ctx->v;
288 9 : double* s = ctx->s;
289 9 : double* t = ctx->t;
290 :
291 9 : poisson_solver_params_t* params = &solver->params;
292 9 : double start_time = poisson_solver_get_time_ms();
293 :
294 : /* Apply initial boundary conditions */
295 9 : poisson_solver_apply_bc(solver, x);
296 :
297 : /* Compute initial residual: r_0 = b - A*x_0 */
298 9 : compute_residual(x, rhs, r, nx, ny, dx2, dy2, inv_dz2, k_start, k_end, stride_z);
299 :
300 : /* r_hat = r_0 (shadow residual) */
301 9 : copy_vector(r, r_hat, nx, ny, k_start, k_end, stride_z);
302 :
303 : /* Initialize: rho = alpha = omega = 1, v = p = 0 */
304 46 : double rho = 1.0;
305 46 : double alpha = 1.0;
306 46 : double omega = 1.0;
307 46 : zero_vector(v, nx, ny, k_start, k_end, stride_z);
308 46 : zero_vector(p, nx, ny, k_start, k_end, stride_z);
309 :
310 : /* Compute initial residual norm */
311 9 : double r_dot_r = dot_product(r, r, nx, ny, k_start, k_end, stride_z);
312 9 : double initial_res = sqrt(r_dot_r);
313 :
314 9 : if (stats) {
315 9 : stats->initial_residual = initial_res;
316 : }
317 :
318 : /* Check if already converged */
319 9 : double tolerance = params->tolerance * initial_res;
320 9 : if (tolerance < params->absolute_tolerance) {
321 1 : tolerance = params->absolute_tolerance;
322 : }
323 :
324 9 : if (initial_res < params->absolute_tolerance) {
325 1 : if (stats) {
326 1 : stats->status = POISSON_CONVERGED;
327 1 : stats->iterations = 0;
328 1 : stats->final_residual = initial_res;
329 1 : stats->elapsed_time_ms = poisson_solver_get_time_ms() - start_time;
330 : }
331 1 : return CFD_SUCCESS;
332 : }
333 :
334 182 : int converged = 0;
335 : int iter;
336 : double res_norm = initial_res;
337 :
338 182 : for (iter = 0; iter < params->max_iterations; iter++) {
339 : /* rho_new = (r_hat, r) */
340 182 : double rho_new = dot_product(r_hat, r, nx, ny, k_start, k_end, stride_z);
341 :
342 : /* Check for breakdown */
343 182 : if (fabs(rho_new) < BICGSTAB_BREAKDOWN_THRESHOLD) {
344 0 : if (stats) {
345 0 : stats->status = POISSON_STAGNATED;
346 0 : stats->iterations = iter + 1;
347 0 : stats->final_residual = res_norm;
348 0 : stats->elapsed_time_ms = poisson_solver_get_time_ms() - start_time;
349 : }
350 0 : return CFD_ERROR_MAX_ITER;
351 : }
352 :
353 : /* beta = (rho_new / rho) * (alpha / omega) */
354 182 : double beta = (rho_new / rho) * (alpha / omega);
355 :
356 : /* p = r + beta * (p - omega * v) */
357 392 : for (size_t k = k_start; k < k_end; k++) {
358 5936 : for (size_t j = 1; j < ny - 1; j++) {
359 171472 : for (size_t i = 1; i < nx - 1; i++) {
360 165746 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
361 165746 : p[idx] = r[idx] + beta * (p[idx] - omega * v[idx]);
362 : }
363 : }
364 : }
365 :
366 : /* v = A * p */
367 182 : apply_laplacian(p, v, nx, ny, dx2, dy2, inv_dz2, k_start, k_end, stride_z);
368 :
369 : /* alpha = rho_new / (r_hat, v) */
370 182 : double r_hat_dot_v = dot_product(r_hat, v, nx, ny, k_start, k_end, stride_z);
371 :
372 : /* Check for breakdown */
373 182 : if (fabs(r_hat_dot_v) < BICGSTAB_BREAKDOWN_THRESHOLD) {
374 0 : if (stats) {
375 0 : stats->status = POISSON_STAGNATED;
376 0 : stats->iterations = iter + 1;
377 0 : stats->final_residual = res_norm;
378 0 : stats->elapsed_time_ms = poisson_solver_get_time_ms() - start_time;
379 : }
380 0 : return CFD_ERROR_MAX_ITER;
381 : }
382 :
383 182 : alpha = rho_new / r_hat_dot_v;
384 :
385 : /* s = r - alpha * v */
386 392 : for (size_t k = k_start; k < k_end; k++) {
387 5936 : for (size_t j = 1; j < ny - 1; j++) {
388 171472 : for (size_t i = 1; i < nx - 1; i++) {
389 165746 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
390 165746 : s[idx] = r[idx] - alpha * v[idx];
391 : }
392 : }
393 : }
394 :
395 : /* Check for early convergence on s */
396 182 : double s_norm = sqrt(dot_product(s, s, nx, ny, k_start, k_end, stride_z));
397 182 : if (s_norm < tolerance || s_norm < params->absolute_tolerance) {
398 : /* Update x and return */
399 8 : axpy(alpha, p, x, nx, ny, k_start, k_end, stride_z);
400 : res_norm = s_norm;
401 : converged = 1;
402 : break;
403 : }
404 :
405 : /* t = A * s */
406 178 : apply_laplacian(s, t, nx, ny, dx2, dy2, inv_dz2, k_start, k_end, stride_z);
407 :
408 : /* omega = (t, s) / (t, t) */
409 534 : double t_dot_s = dot_product(t, s, nx, ny, k_start, k_end, stride_z);
410 178 : double t_dot_t = dot_product(t, t, nx, ny, k_start, k_end, stride_z);
411 :
412 : /* Check for breakdown */
413 178 : if (fabs(t_dot_t) < BICGSTAB_BREAKDOWN_THRESHOLD) {
414 : /* Update x with available progress */
415 0 : axpy(alpha, p, x, nx, ny, k_start, k_end, stride_z);
416 0 : if (stats) {
417 0 : stats->status = POISSON_STAGNATED;
418 0 : stats->iterations = iter + 1;
419 0 : stats->final_residual = s_norm;
420 0 : stats->elapsed_time_ms = poisson_solver_get_time_ms() - start_time;
421 : }
422 0 : return CFD_ERROR_MAX_ITER;
423 : }
424 :
425 178 : omega = t_dot_s / t_dot_t;
426 :
427 : /* x = x + alpha * p + omega * s */
428 356 : for (size_t k = k_start; k < k_end; k++) {
429 5408 : for (size_t j = 1; j < ny - 1; j++) {
430 163040 : for (size_t i = 1; i < nx - 1; i++) {
431 157810 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
432 157810 : x[idx] += alpha * p[idx] + omega * s[idx];
433 : }
434 : }
435 : }
436 :
437 : /* r = s - omega * t */
438 356 : for (size_t k = k_start; k < k_end; k++) {
439 5408 : for (size_t j = 1; j < ny - 1; j++) {
440 163040 : for (size_t i = 1; i < nx - 1; i++) {
441 157810 : size_t idx = (k * stride_z) + (IDX_2D(i, j, nx));
442 157810 : r[idx] = s[idx] - omega * t[idx];
443 : }
444 : }
445 : }
446 :
447 : /* Update rho for next iteration */
448 356 : rho = rho_new;
449 :
450 : /* Compute residual norm */
451 178 : res_norm = sqrt(dot_product(r, r, nx, ny, k_start, k_end, stride_z));
452 :
453 : /* Check convergence at intervals */
454 178 : if (iter % params->check_interval == 0) {
455 178 : if (params->verbose) {
456 0 : printf(" BiCGSTAB Iter %d: residual = %.6e\n", iter, res_norm);
457 : }
458 :
459 178 : if (res_norm < tolerance || res_norm < params->absolute_tolerance) {
460 : converged = 1;
461 : break;
462 : }
463 : }
464 :
465 : /* Check for omega breakdown (would cause division by zero next iteration) */
466 174 : if (fabs(omega) < BICGSTAB_BREAKDOWN_THRESHOLD) {
467 0 : if (stats) {
468 0 : stats->status = POISSON_STAGNATED;
469 0 : stats->iterations = iter + 1;
470 0 : stats->final_residual = res_norm;
471 0 : stats->elapsed_time_ms = poisson_solver_get_time_ms() - start_time;
472 : }
473 0 : return CFD_ERROR_MAX_ITER;
474 : }
475 : }
476 :
477 : /* Final convergence check */
478 8 : if (!converged && (res_norm < tolerance || res_norm < params->absolute_tolerance)) {
479 8 : converged = 1;
480 : }
481 :
482 : /* Apply final boundary conditions */
483 8 : poisson_solver_apply_bc(solver, x);
484 :
485 8 : double end_time = poisson_solver_get_time_ms();
486 :
487 8 : if (stats) {
488 8 : stats->iterations = (iter < params->max_iterations) ? (iter + 1) : iter;
489 8 : stats->final_residual = res_norm;
490 8 : stats->elapsed_time_ms = end_time - start_time;
491 8 : stats->status = converged ? POISSON_CONVERGED : POISSON_MAX_ITER;
492 : }
493 :
494 8 : return converged ? CFD_SUCCESS : CFD_ERROR_MAX_ITER;
495 : }
496 :
497 : /**
498 : * Single iteration is not well-defined for BiCGSTAB as it maintains internal state.
499 : * We provide a minimal implementation that returns the current residual.
500 : */
501 0 : static cfd_status_t bicgstab_scalar_iterate(
502 : poisson_solver_t* solver,
503 : double* x,
504 : double* x_temp,
505 : const double* rhs,
506 : double* residual)
507 : {
508 0 : (void)x_temp;
509 :
510 : /* BiCGSTAB doesn't support single iteration mode well.
511 : * Return the current residual from the Laplacian. */
512 0 : if (residual) {
513 0 : *residual = poisson_solver_compute_residual(solver, x, rhs);
514 : }
515 0 : return CFD_SUCCESS;
516 : }
517 :
518 : /* ============================================================================
519 : * FACTORY FUNCTION
520 : * ============================================================================ */
521 :
522 11 : poisson_solver_t* create_bicgstab_scalar_solver(void) {
523 11 : poisson_solver_t* solver = (poisson_solver_t*)cfd_calloc(1, sizeof(poisson_solver_t));
524 11 : if (!solver) {
525 : return NULL;
526 : }
527 :
528 11 : solver->name = POISSON_SOLVER_TYPE_BICGSTAB_SCALAR;
529 11 : solver->description = "BiCGSTAB (scalar CPU)";
530 11 : solver->method = POISSON_METHOD_BICGSTAB;
531 11 : solver->backend = POISSON_BACKEND_SCALAR;
532 11 : solver->params = poisson_solver_params_default();
533 :
534 11 : solver->init = bicgstab_scalar_init;
535 11 : solver->destroy = bicgstab_scalar_destroy;
536 11 : solver->solve = bicgstab_scalar_solve; /* BiCGSTAB uses custom solve loop */
537 11 : solver->iterate = bicgstab_scalar_iterate;
538 11 : solver->apply_bc = NULL; /* Use default Neumann */
539 :
540 11 : return solver;
541 : }
|