LCOV - code coverage report
Current view: top level - solvers/navier_stokes/avx2 - solver_projection_avx2.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 10.9 % 119 13
Test Date: 2026-03-04 10:22:18 Functions: 100.0 % 3 3

            Line data    Source code
       1              : /**
       2              :  * Optimized Projection Method NSSolver (Chorin's Method) with SIMD + OpenMP
       3              :  *
       4              :  * This implementation combines SIMD vectorization (AVX2) with OpenMP
       5              :  * parallelization for maximum performance on multi-core CPUs.
       6              :  *
       7              :  * - Predictor step: OpenMP parallelized (scalar inner loops)
       8              :  * - Corrector step: OpenMP parallelized with AVX2 SIMD inner loops
       9              :  * - Poisson solver: Uses SIMD Poisson solver for pressure computation
      10              :  */
      11              : 
      12              : #include "cfd/boundary/boundary_conditions.h"
      13              : #include "cfd/core/cfd_status.h"
      14              : #include "cfd/core/grid.h"
      15              : #include "cfd/core/indexing.h"
      16              : #include "cfd/core/memory.h"
      17              : #include "cfd/solvers/navier_stokes_solver.h"
      18              : #include "cfd/solvers/poisson_solver.h"
      19              : 
      20              : #include "../boundary_copy_utils.h"
      21              : 
      22              : #include <math.h>
      23              : #include <stdio.h>
      24              : #include <string.h>
      25              : 
      26              : #ifndef M_PI
      27              : #define M_PI 3.14159265358979323846
      28              : #endif
      29              : 
      30              : #ifdef _OPENMP
      31              : #include <omp.h>
      32              : #endif
      33              : 
      34              : /* AVX2 detection
      35              :  * CFD_HAS_AVX2 is set by CMake when -DCFD_ENABLE_AVX2=ON.
      36              :  * This works consistently across all compilers (GCC, Clang, MSVC).
      37              :  */
      38              : #if defined(CFD_HAS_AVX2)
      39              : #include <immintrin.h>
      40              : #define USE_AVX 1
      41              : #else
      42              : #define USE_AVX 0
      43              : #endif
      44              : 
      45              : // Physical limits
      46              : #define MAX_VELOCITY 100.0
      47              : 
      48              : typedef struct {
      49              :     double* u_star;
      50              :     double* v_star;
      51              :     double* w_star;
      52              :     double* p_new;
      53              :     double* rhs;
      54              :     double* u_new;  /* used as p_temp for Poisson solver */
      55              :     size_t nx;
      56              :     size_t ny;
      57              :     size_t nz;
      58              :     size_t stride_z;
      59              :     size_t k_start;
      60              :     size_t k_end;
      61              :     double inv_2dz;
      62              :     double inv_dz2;
      63              :     int initialized;
      64              :     int iter_count;
      65              : } projection_simd_context;
      66              : 
      67              : // Public API
      68              : cfd_status_t projection_simd_init(struct NSSolver* solver, const grid* grid,
      69              :                                   const ns_solver_params_t* params);
      70              : void projection_simd_destroy(struct NSSolver* solver);
      71              : cfd_status_t projection_simd_step(struct NSSolver* solver, flow_field* field, const grid* grid,
      72              :                                   const ns_solver_params_t* params, ns_solver_stats_t* stats);
      73              : 
      74           14 : cfd_status_t projection_simd_init(struct NSSolver* solver, const grid* grid,
      75              :                                   const ns_solver_params_t* params) {
      76           14 :     (void)params;
      77           14 :     if (!solver || !grid) {
      78              :         return CFD_ERROR_INVALID;
      79              :     }
      80           14 :     if (grid->nx < 3 || grid->ny < 3 || (grid->nz > 1 && grid->nz < 3)) {
      81              :         return CFD_ERROR_INVALID;
      82              :     }
      83              : 
      84              :     /* Verify SIMD CG Poisson solver is available before allocating resources */
      85           14 :     poisson_solver_t* test_solver = poisson_solver_create(
      86              :         POISSON_METHOD_CG, POISSON_BACKEND_SIMD);
      87           14 :     if (!test_solver) {
      88           14 :         fprintf(stderr, "projection_simd_init: SIMD CG Poisson solver not available\n");
      89           14 :         return CFD_ERROR_UNSUPPORTED;
      90              :     }
      91            0 :     poisson_solver_destroy(test_solver);
      92              : 
      93            0 :     projection_simd_context* ctx =
      94            0 :         (projection_simd_context*)cfd_calloc(1, sizeof(projection_simd_context));
      95            0 :     if (!ctx) {
      96              :         return CFD_ERROR_NOMEM;
      97              :     }
      98              : 
      99            0 :     ctx->nx = grid->nx;
     100            0 :     ctx->ny = grid->ny;
     101            0 :     ctx->nz = grid->nz;
     102            0 :     size_t size = ctx->nx * ctx->ny * grid->nz * sizeof(double);
     103              : 
     104              :     /* Reject non-uniform z-spacing (solver uses constant dz) */
     105            0 :     if (grid->nz > 1 && grid->dz) {
     106            0 :         for (size_t kk = 1; kk < grid->nz - 1; kk++) {
     107            0 :             if (fabs(grid->dz[kk] - grid->dz[0]) > 1e-14) {
     108            0 :                 cfd_free(ctx);
     109            0 :                 return CFD_ERROR_INVALID;
     110              :             }
     111              :         }
     112              :     }
     113              : 
     114            0 :     size_t plane = ctx->nx * ctx->ny;
     115            0 :     ctx->stride_z = (grid->nz > 1) ? plane : 0;
     116            0 :     ctx->k_start  = (grid->nz > 1) ? 1 : 0;
     117            0 :     ctx->k_end    = (grid->nz > 1) ? (grid->nz - 1) : 1;
     118            0 :     double dz = (grid->nz > 1 && grid->dz) ? grid->dz[0] : 0.0;
     119            0 :     ctx->inv_2dz  = (grid->nz > 1 && grid->dz) ? 1.0 / (2.0 * dz) : 0.0;
     120            0 :     ctx->inv_dz2  = (grid->nz > 1 && grid->dz) ? 1.0 / (dz * dz) : 0.0;
     121              : 
     122            0 :     ctx->u_star = (double*)cfd_aligned_malloc(size);
     123            0 :     ctx->v_star = (double*)cfd_aligned_malloc(size);
     124            0 :     ctx->w_star = (double*)cfd_aligned_malloc(size);
     125            0 :     ctx->p_new  = (double*)cfd_aligned_malloc(size);
     126            0 :     ctx->rhs    = (double*)cfd_aligned_malloc(size);
     127            0 :     ctx->u_new  = (double*)cfd_aligned_malloc(size);
     128              : 
     129            0 :     if (!ctx->u_star || !ctx->v_star || !ctx->w_star || !ctx->p_new ||
     130            0 :         !ctx->rhs || !ctx->u_new) {
     131            0 :         if (ctx->u_star) {
     132            0 :             cfd_aligned_free(ctx->u_star);
     133              :         }
     134            0 :         if (ctx->v_star) {
     135            0 :             cfd_aligned_free(ctx->v_star);
     136              :         }
     137            0 :         if (ctx->w_star) {
     138            0 :             cfd_aligned_free(ctx->w_star);
     139              :         }
     140            0 :         if (ctx->p_new) {
     141            0 :             cfd_aligned_free(ctx->p_new);
     142              :         }
     143            0 :         if (ctx->rhs) {
     144            0 :             cfd_aligned_free(ctx->rhs);
     145              :         }
     146            0 :         if (ctx->u_new) {
     147            0 :             cfd_aligned_free(ctx->u_new);
     148              :         }
     149            0 :         cfd_free(ctx);
     150            0 :         return CFD_ERROR_NOMEM;
     151              :     }
     152              : 
     153            0 :     ctx->initialized = 1;
     154            0 :     solver->context = ctx;
     155            0 :     return CFD_SUCCESS;
     156              : }
     157              : 
     158           18 : void projection_simd_destroy(struct NSSolver* solver) {
     159           18 :     if (solver && solver->context) {
     160            0 :         projection_simd_context* ctx = (projection_simd_context*)solver->context;
     161            0 :         if (ctx->initialized) {
     162            0 :             cfd_aligned_free(ctx->u_star);
     163            0 :             cfd_aligned_free(ctx->v_star);
     164            0 :             cfd_aligned_free(ctx->w_star);
     165            0 :             cfd_aligned_free(ctx->p_new);
     166            0 :             cfd_aligned_free(ctx->rhs);
     167            0 :             cfd_aligned_free(ctx->u_new);
     168              :         }
     169            0 :         cfd_free(ctx);
     170            0 :         solver->context = NULL;
     171              :     }
     172           18 : }
     173              : 
     174            1 : cfd_status_t projection_simd_step(struct NSSolver* solver, flow_field* field, const grid* grid,
     175              :                                   const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     176            1 :     if (!solver || !solver->context || !field || !grid || !params) {
     177              :         return CFD_ERROR_INVALID;
     178              :     }
     179            0 :     if (field->nx < 3 || field->ny < 3 || (field->nz > 1 && field->nz < 3)) {
     180              :         return CFD_ERROR_INVALID;
     181              :     }
     182              : 
     183            0 :     projection_simd_context* ctx = (projection_simd_context*)solver->context;
     184              : 
     185              :     // Verify context matches current grid
     186            0 :     if (ctx->nx != field->nx || ctx->ny != field->ny || ctx->nz != field->nz) {
     187              :         return CFD_ERROR_INVALID;
     188              :     }
     189              : 
     190            0 :     size_t nx = field->nx;
     191            0 :     size_t ny = field->ny;
     192            0 :     size_t size = nx * ny * ctx->nz;
     193              : 
     194            0 :     double dx = grid->dx[0];
     195            0 :     double dy = grid->dy[0];
     196            0 :     double dz = (ctx->nz > 1 && grid->dz) ? grid->dz[0] : 0.0;
     197            0 :     double dt = params->dt;
     198            0 :     double nu = params->mu;  // Viscosity (treated as kinematic for ρ=1)
     199              : 
     200            0 :     double* u_star = ctx->u_star;
     201            0 :     double* v_star = ctx->v_star;
     202            0 :     double* w_star = ctx->w_star;
     203            0 :     double* p_new = ctx->p_new;
     204            0 :     double* rhs = ctx->rhs;
     205              : 
     206              :     // Copy current field values to work buffers (includes boundaries)
     207            0 :     memcpy(u_star, field->u, size * sizeof(double));
     208            0 :     memcpy(v_star, field->v, size * sizeof(double));
     209            0 :     memcpy(w_star, field->w, size * sizeof(double));
     210            0 :     memcpy(p_new, field->p, size * sizeof(double));
     211            0 :     memset(rhs, 0, size * sizeof(double));
     212              : 
     213              :     // ============================================================
     214              :     // STEP 1: Predictor - Compute intermediate velocity u*
     215              :     // (OpenMP parallelized outer loop, scalar inner loop)
     216              :     // ============================================================
     217            0 :     int ny_int = (int)ny;
     218            0 :     int nx_int = (int)nx;
     219            0 :     int jj;
     220            0 :     (void)nx_int;  /* suppress unused variable warning */
     221              : 
     222            0 :     for (size_t k = ctx->k_start; k < ctx->k_end; k++) {
     223            0 :         size_t k_off = k * ctx->stride_z;
     224              : #ifdef _OPENMP
     225            0 :         #pragma omp parallel for schedule(static)
     226              : #endif
     227              :         for (jj = 1; jj < ny_int - 1; jj++) {
     228              :             size_t j = (size_t)jj;
     229              :             for (size_t i = 1; i < nx - 1; i++) {
     230              :                 size_t idx = k_off + IDX_2D(i, j, nx);
     231              : 
     232              :                 double u = field->u[idx];
     233              :                 double v = field->v[idx];
     234              :                 double w = field->w[idx];
     235              : 
     236              :                 // Convective terms: -u·∇u (central differences)
     237              :                 double du_dx = (field->u[idx + 1] - field->u[idx - 1]) / (2.0 * dx);
     238              :                 double du_dy = (field->u[idx + nx] - field->u[idx - nx]) / (2.0 * dy);
     239              :                 double du_dz = (field->u[idx + ctx->stride_z] - field->u[idx - ctx->stride_z]) *
     240              :                                ctx->inv_2dz;
     241              : 
     242              :                 double dv_dx = (field->v[idx + 1] - field->v[idx - 1]) / (2.0 * dx);
     243              :                 double dv_dy = (field->v[idx + nx] - field->v[idx - nx]) / (2.0 * dy);
     244              :                 double dv_dz = (field->v[idx + ctx->stride_z] - field->v[idx - ctx->stride_z]) *
     245              :                                ctx->inv_2dz;
     246              : 
     247              :                 double dw_dx = (field->w[idx + 1] - field->w[idx - 1]) / (2.0 * dx);
     248              :                 double dw_dy = (field->w[idx + nx] - field->w[idx - nx]) / (2.0 * dy);
     249              :                 double dw_dz = (field->w[idx + ctx->stride_z] - field->w[idx - ctx->stride_z]) *
     250              :                                ctx->inv_2dz;
     251              : 
     252              :                 double conv_u = (u * du_dx) + (v * du_dy) + (w * du_dz);
     253              :                 double conv_v = (u * dv_dx) + (v * dv_dy) + (w * dv_dz);
     254              :                 double conv_w = (u * dw_dx) + (v * dw_dy) + (w * dw_dz);
     255              : 
     256              :                 // Viscous terms: ν∇²u
     257              :                 double d2u_dx2 = (field->u[idx + 1] - 2.0 * u + field->u[idx - 1]) / (dx * dx);
     258              :                 double d2u_dy2 = (field->u[idx + nx] - 2.0 * u + field->u[idx - nx]) / (dy * dy);
     259              :                 double d2u_dz2 = (field->u[idx + ctx->stride_z] - 2.0 * u +
     260              :                                   field->u[idx - ctx->stride_z]) * ctx->inv_dz2;
     261              : 
     262              :                 double d2v_dx2 = (field->v[idx + 1] - 2.0 * v + field->v[idx - 1]) / (dx * dx);
     263              :                 double d2v_dy2 = (field->v[idx + nx] - 2.0 * v + field->v[idx - nx]) / (dy * dy);
     264              :                 double d2v_dz2 = (field->v[idx + ctx->stride_z] - 2.0 * v +
     265              :                                   field->v[idx - ctx->stride_z]) * ctx->inv_dz2;
     266              : 
     267              :                 double d2w_dx2 = (field->w[idx + 1] - 2.0 * w + field->w[idx - 1]) / (dx * dx);
     268              :                 double d2w_dy2 = (field->w[idx + nx] - 2.0 * w + field->w[idx - nx]) / (dy * dy);
     269              :                 double d2w_dz2 = (field->w[idx + ctx->stride_z] - 2.0 * w +
     270              :                                   field->w[idx - ctx->stride_z]) * ctx->inv_dz2;
     271              : 
     272              :                 double visc_u = nu * (d2u_dx2 + d2u_dy2 + d2u_dz2);
     273              :                 double visc_v = nu * (d2v_dx2 + d2v_dy2 + d2v_dz2);
     274              :                 double visc_w = nu * (d2w_dx2 + d2w_dy2 + d2w_dz2);
     275              : 
     276              :                 // Source terms
     277              :                 double source_u = 0.0;
     278              :                 double source_v = 0.0;
     279              :                 double source_w = 0.0;
     280              :                 double x_coord = grid->x[i];
     281              :                 double y_coord = grid->y[j];
     282              :                 double z_coord = (ctx->nz > 1 && grid->z) ? grid->z[k] : 0.0;
     283              :                 compute_source_terms(x_coord, y_coord, z_coord, ctx->iter_count, dt, params,
     284              :                                      &source_u, &source_v, &source_w);
     285              : 
     286              :                 // Intermediate velocity (without pressure gradient)
     287              :                 u_star[idx] = u + (dt * (-conv_u + visc_u + source_u));
     288              :                 v_star[idx] = v + (dt * (-conv_v + visc_v + source_v));
     289              :                 w_star[idx] = w + (dt * (-conv_w + visc_w + source_w));
     290              : 
     291              :                 // Limit velocities
     292              :                 u_star[idx] = fmax(-MAX_VELOCITY, fmin(MAX_VELOCITY, u_star[idx]));
     293              :                 v_star[idx] = fmax(-MAX_VELOCITY, fmin(MAX_VELOCITY, v_star[idx]));
     294              :                 w_star[idx] = fmax(-MAX_VELOCITY, fmin(MAX_VELOCITY, w_star[idx]));
     295              :             }
     296              :         }
     297              :     }
     298              : 
     299              :     // Copy boundary values from field to star arrays
     300            0 :     copy_boundary_velocities_3d(u_star, v_star, w_star, field->u, field->v, field->w,
     301              :                                 nx, ny, ctx->nz);
     302              : 
     303              :     // ============================================================
     304              :     // STEP 2: Solve Poisson equation for pressure
     305              :     // ∇²p = (ρ/dt) * ∇·u*
     306              :     // ============================================================
     307              : 
     308            0 :     double rho = field->rho[0];
     309            0 :     if (rho < 1e-10) {
     310            0 :         rho = 1.0;
     311              :     }
     312              : 
     313              :     // Compute RHS: divergence of intermediate velocity
     314            0 :     for (size_t k = ctx->k_start; k < ctx->k_end; k++) {
     315            0 :         size_t k_off = k * ctx->stride_z;
     316              : #ifdef _OPENMP
     317            0 :         #pragma omp parallel for schedule(static)
     318              : #endif
     319              :         for (jj = 1; jj < ny_int - 1; jj++) {
     320              :             size_t j = (size_t)jj;
     321              :             for (size_t i = 1; i < nx - 1; i++) {
     322              :                 size_t idx = k_off + IDX_2D(i, j, nx);
     323              : 
     324              :                 double du_star_dx = (u_star[idx + 1] - u_star[idx - 1]) / (2.0 * dx);
     325              :                 double dv_star_dy = (v_star[idx + nx] - v_star[idx - nx]) / (2.0 * dy);
     326              :                 double dw_star_dz = (w_star[idx + ctx->stride_z] -
     327              :                                      w_star[idx - ctx->stride_z]) * ctx->inv_2dz;
     328              : 
     329              :                 double divergence = du_star_dx + dv_star_dy + dw_star_dz;
     330              :                 rhs[idx] = (rho / dt) * divergence;
     331              :             }
     332              :         }
     333              :     }
     334              : 
     335              :     // Use SIMD Poisson solver (Conjugate Gradient with SIMD)
     336              :     // ctx->u_new is used as temp buffer for the Poisson solver
     337            0 :     int poisson_iters = poisson_solve_3d(p_new, ctx->u_new, rhs, nx, ny, ctx->nz,
     338              :                                          dx, dy, dz, POISSON_SOLVER_CG_SIMD);
     339              : 
     340            0 :     if (poisson_iters < 0) {
     341              :         return CFD_ERROR_MAX_ITER;
     342              :     }
     343              : 
     344              :     // ============================================================
     345              :     // STEP 3: Corrector - Project velocity to be divergence-free
     346              :     // u^(n+1) = u* - (dt/ρ) * ∇p
     347              :     // (OpenMP parallelized with SIMD inner loop)
     348              :     // ============================================================
     349              : 
     350            0 :     double dt_over_rho = dt / rho;
     351            0 :     double inv_2dx = 1.0 / (2.0 * dx);
     352            0 :     double inv_2dy = 1.0 / (2.0 * dy);
     353              : 
     354              : #if USE_AVX
     355              :     __m256d dt_rho_vec      = _mm256_set1_pd(dt_over_rho);
     356              :     __m256d inv_2dx_vec     = _mm256_set1_pd(inv_2dx);
     357              :     __m256d inv_2dy_vec     = _mm256_set1_pd(inv_2dy);
     358              :     __m256d inv_2dz_vec     = _mm256_set1_pd(ctx->inv_2dz);
     359              :     __m256d max_vel_vec     = _mm256_set1_pd(MAX_VELOCITY);
     360              :     __m256d neg_max_vel_vec = _mm256_set1_pd(-MAX_VELOCITY);
     361              : #endif
     362              : 
     363            0 :     for (size_t k = ctx->k_start; k < ctx->k_end; k++) {
     364            0 :         size_t k_off = k * ctx->stride_z;
     365              : #ifdef _OPENMP
     366            0 :         #pragma omp parallel for schedule(static)
     367              : #endif
     368              :         for (jj = 1; jj < ny_int - 1; jj++) {
     369              :             size_t j = (size_t)jj;
     370              :             size_t i = 1;
     371              : 
     372              : #if USE_AVX
     373              :             // SIMD loop - process 4 cells at once
     374              :             for (; i + 4 <= nx - 1; i += 4) {
     375              :                 size_t idx = k_off + IDX_2D(i, j, nx);
     376              : 
     377              :                 // Load pressure neighbors for gradient computation
     378              :                 __m256d p_xp = _mm256_loadu_pd(&p_new[idx + 1]);
     379              :                 __m256d p_xm = _mm256_loadu_pd(&p_new[idx - 1]);
     380              :                 __m256d p_yp = _mm256_loadu_pd(&p_new[idx + nx]);
     381              :                 __m256d p_ym = _mm256_loadu_pd(&p_new[idx - nx]);
     382              :                 __m256d p_zp = _mm256_loadu_pd(&p_new[idx + ctx->stride_z]);
     383              :                 __m256d p_zm = _mm256_loadu_pd(&p_new[idx - ctx->stride_z]);
     384              : 
     385              :                 // Compute pressure gradients
     386              :                 __m256d dp_dx = _mm256_mul_pd(_mm256_sub_pd(p_xp, p_xm), inv_2dx_vec);
     387              :                 __m256d dp_dy = _mm256_mul_pd(_mm256_sub_pd(p_yp, p_ym), inv_2dy_vec);
     388              :                 __m256d dp_dz = _mm256_mul_pd(_mm256_sub_pd(p_zp, p_zm), inv_2dz_vec);
     389              : 
     390              :                 // Load intermediate velocities
     391              :                 __m256d u_s = _mm256_loadu_pd(&u_star[idx]);
     392              :                 __m256d v_s = _mm256_loadu_pd(&v_star[idx]);
     393              :                 __m256d w_s = _mm256_loadu_pd(&w_star[idx]);
     394              : 
     395              :                 // Corrector: u = u* - (dt/rho) * dp/dx
     396              :                 __m256d u_new = _mm256_sub_pd(u_s, _mm256_mul_pd(dt_rho_vec, dp_dx));
     397              :                 __m256d v_new = _mm256_sub_pd(v_s, _mm256_mul_pd(dt_rho_vec, dp_dy));
     398              :                 __m256d w_new = _mm256_sub_pd(w_s, _mm256_mul_pd(dt_rho_vec, dp_dz));
     399              : 
     400              :                 // Clamp velocities to [-MAX_VELOCITY, MAX_VELOCITY]
     401              :                 u_new = _mm256_max_pd(neg_max_vel_vec, _mm256_min_pd(max_vel_vec, u_new));
     402              :                 v_new = _mm256_max_pd(neg_max_vel_vec, _mm256_min_pd(max_vel_vec, v_new));
     403              :                 w_new = _mm256_max_pd(neg_max_vel_vec, _mm256_min_pd(max_vel_vec, w_new));
     404              : 
     405              :                 // Store results
     406              :                 _mm256_storeu_pd(&field->u[idx], u_new);
     407              :                 _mm256_storeu_pd(&field->v[idx], v_new);
     408              :                 _mm256_storeu_pd(&field->w[idx], w_new);
     409              :             }
     410              : #endif
     411              : 
     412              :             // Scalar remainder
     413              :             for (; i < nx - 1; i++) {
     414              :                 size_t idx = k_off + IDX_2D(i, j, nx);
     415              : 
     416              :                 double dp_dx = (p_new[idx + 1] - p_new[idx - 1]) * inv_2dx;
     417              :                 double dp_dy = (p_new[idx + nx] - p_new[idx - nx]) * inv_2dy;
     418              :                 double dp_dz = (p_new[idx + ctx->stride_z] - p_new[idx - ctx->stride_z]) *
     419              :                                ctx->inv_2dz;
     420              : 
     421              :                 field->u[idx] = u_star[idx] - (dt_over_rho * dp_dx);
     422              :                 field->v[idx] = v_star[idx] - (dt_over_rho * dp_dy);
     423              :                 field->w[idx] = w_star[idx] - (dt_over_rho * dp_dz);
     424              : 
     425              :                 // Limit velocities
     426              :                 field->u[idx] = fmax(-MAX_VELOCITY, fmin(MAX_VELOCITY, field->u[idx]));
     427              :                 field->v[idx] = fmax(-MAX_VELOCITY, fmin(MAX_VELOCITY, field->v[idx]));
     428              :                 field->w[idx] = fmax(-MAX_VELOCITY, fmin(MAX_VELOCITY, field->w[idx]));
     429              :             }
     430              :         }
     431              :     }
     432              : 
     433              :     // Update pressure field
     434            0 :     memcpy(field->p, p_new, size * sizeof(double));
     435              : 
     436              :     // Copy boundary velocity values from star arrays (which have caller's BCs)
     437            0 :     copy_boundary_velocities_3d(field->u, field->v, field->w, u_star, v_star, w_star,
     438              :                                 nx, ny, ctx->nz);
     439              : 
     440              :     // Check for NaN
     441            0 :     for (size_t n = 0; n < size; n++) {
     442            0 :         if (!isfinite(field->u[n]) || !isfinite(field->v[n]) ||
     443            0 :             !isfinite(field->w[n]) || !isfinite(field->p[n])) {
     444              :             return CFD_ERROR_DIVERGED;
     445              :         }
     446              :     }
     447              : 
     448            0 :     ctx->iter_count++;
     449              : 
     450            0 :     if (stats) {
     451            0 :         stats->iterations = 1;
     452              :     }
     453              : 
     454              :     return CFD_SUCCESS;
     455              : }
        

Generated by: LCOV version 2.0-1