LCOV - code coverage report
Current view: top level - solvers/navier_stokes/omp - solver_explicit_euler_omp.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 81.0 % 58 47
Test Date: 2026-03-04 10:22:18 Functions: 100.0 % 1 1

            Line data    Source code
       1              : #include "cfd/core/cfd_status.h"
       2              : #include "cfd/core/grid.h"
       3              : #include "cfd/core/indexing.h"
       4              : #include "cfd/core/memory.h"
       5              : 
       6              : #include "cfd/solvers/navier_stokes_solver.h"
       7              : #include "../boundary_copy_utils.h"
       8              : #include <math.h>
       9              : #include <omp.h>
      10              : #include <stdio.h>
      11              : #include <string.h>
      12              : 
      13              : 
      14              : 
      15              : #ifndef M_PI
      16              : #define M_PI 3.14159265358979323846
      17              : #endif
      18              : 
      19              : // Physical stability limits (same as cpu solver)
      20              : #define MAX_DERIVATIVE_LIMIT        100.0
      21              : #define MAX_SECOND_DERIVATIVE_LIMIT 1000.0
      22              : #define MAX_VELOCITY_LIMIT          100.0
      23              : #define MAX_DIVERGENCE_LIMIT        10.0
      24              : #define DT_CONSERVATIVE_LIMIT       0.0001
      25              : #define UPDATE_LIMIT                1.0
      26              : #define PRESSURE_UPDATE_FACTOR      0.1
      27              : 
      28              : // Internal OpenMP explicit Euler implementation
      29          212 : cfd_status_t explicit_euler_omp_impl(flow_field* field, const grid* grid,
      30              :                                      const ns_solver_params_t* params) {
      31          212 :     if (field->nx < 3 || field->ny < 3 || (field->nz > 1 && field->nz < 3)) {
      32              :         return CFD_ERROR_INVALID;
      33              :     }
      34              : 
      35          212 :     size_t nz = field->nz;
      36          212 :     if (nz > 1 && grid->dz) {
      37           14 :         for (size_t kk = 1; kk < nz - 1; kk++) {
      38           12 :             if (fabs(grid->dz[kk] - grid->dz[0]) > 1e-14) {
      39              :                 return CFD_ERROR_INVALID;
      40              :             }
      41              :         }
      42              :     }
      43              : 
      44          212 :     size_t nx = field->nx;
      45          212 :     size_t ny = field->ny;
      46          212 :     size_t plane = nx * ny;
      47          212 :     size_t total = plane * nz;
      48              : 
      49          212 :     size_t stride_z = (nz > 1) ? plane : 0;
      50          212 :     size_t k_start  = (nz > 1) ? 1 : 0;
      51          212 :     size_t k_end    = (nz > 1) ? (nz - 1) : 1;
      52          212 :     double inv_2dz  = (nz > 1 && grid->dz) ? 1.0 / (2.0 * grid->dz[0]) : 0.0;
      53            2 :     double inv_dz2  = (nz > 1 && grid->dz) ? 1.0 / (grid->dz[0] * grid->dz[0]) : 0.0;
      54              : 
      55              :     // Allocate temporary arrays
      56          212 :     double* u_new = (double*)cfd_calloc(total, sizeof(double));
      57          212 :     double* v_new = (double*)cfd_calloc(total, sizeof(double));
      58          212 :     double* w_new = (double*)cfd_calloc(total, sizeof(double));
      59          212 :     double* p_new = (double*)cfd_calloc(total, sizeof(double));
      60              : 
      61          212 :     if (!u_new || !v_new || !w_new || !p_new) {
      62            0 :         cfd_free(u_new);
      63            0 :         cfd_free(v_new);
      64            0 :         cfd_free(w_new);
      65            0 :         cfd_free(p_new);
      66            0 :         return CFD_ERROR_NOMEM;
      67              :     }
      68              : 
      69              :     // Initialize with current values
      70          212 :     memcpy(u_new, field->u, total * sizeof(double));
      71          212 :     memcpy(v_new, field->v, total * sizeof(double));
      72          212 :     memcpy(w_new, field->w, total * sizeof(double));
      73          212 :     memcpy(p_new, field->p, total * sizeof(double));
      74              : 
      75          212 :     double conservative_dt = fmin(params->dt, DT_CONSERVATIVE_LIMIT);
      76              : 
      77          424 :     for (int iter = 0; iter < params->max_iter; iter++) {
      78          434 :         for (size_t kk = k_start; kk < k_end; kk++) {
      79          222 :             int j;
      80          222 : #pragma omp parallel for schedule(static)
      81              :             for (j = 1; j < (int)ny - 1; j++) {
      82              :                 for (int i = 1; i < (int)nx - 1; i++) {
      83              :                     size_t idx = (kk * stride_z) + IDX_2D(i, j, nx);
      84              : 
      85              :                     // Derivatives
      86              :                     double du_dx = (field->u[idx + 1] - field->u[idx - 1]) / (2.0 * grid->dx[i]);
      87              :                     double du_dy =
      88              :                         (field->u[idx + nx] - field->u[idx - nx]) / (2.0 * grid->dy[j]);
      89              :                     double dv_dx = (field->v[idx + 1] - field->v[idx - 1]) / (2.0 * grid->dx[i]);
      90              :                     double dv_dy =
      91              :                         (field->v[idx + nx] - field->v[idx - nx]) / (2.0 * grid->dy[j]);
      92              : 
      93              :                     double dp_dx = (field->p[idx + 1] - field->p[idx - 1]) / (2.0 * grid->dx[i]);
      94              :                     double dp_dy =
      95              :                         (field->p[idx + nx] - field->p[idx - nx]) / (2.0 * grid->dy[j]);
      96              : 
      97              :                     double d2u_dx2 = (field->u[idx + 1] - 2.0 * field->u[idx] + field->u[idx - 1]) /
      98              :                                      (grid->dx[i] * grid->dx[i]);
      99              :                     double d2u_dy2 =
     100              :                         (field->u[idx + nx] - 2.0 * field->u[idx] + field->u[idx - nx]) /
     101              :                         (grid->dy[j] * grid->dy[j]);
     102              :                     double d2v_dx2 = (field->v[idx + 1] - 2.0 * field->v[idx] + field->v[idx - 1]) /
     103              :                                      (grid->dx[i] * grid->dx[i]);
     104              :                     double d2v_dy2 =
     105              :                         (field->v[idx + nx] - 2.0 * field->v[idx] + field->v[idx - nx]) /
     106              :                         (grid->dy[j] * grid->dy[j]);
     107              : 
     108              :                     double du_dz = (field->u[idx + stride_z] - field->u[idx - stride_z]) * inv_2dz;
     109              :                     double dv_dz = (field->v[idx + stride_z] - field->v[idx - stride_z]) * inv_2dz;
     110              :                     double dw_dx = (field->w[idx + 1] - field->w[idx - 1]) / (2.0 * grid->dx[i]);
     111              :                     double dw_dy = (field->w[idx + nx] - field->w[idx - nx]) / (2.0 * grid->dy[j]);
     112              :                     double dw_dz = (field->w[idx + stride_z] - field->w[idx - stride_z]) * inv_2dz;
     113              :                     double dp_dz = (field->p[idx + stride_z] - field->p[idx - stride_z]) * inv_2dz;
     114              : 
     115              :                     double d2u_dz2 = (field->u[idx + stride_z] - 2.0 * field->u[idx] + field->u[idx - stride_z]) * inv_dz2;
     116              :                     double d2v_dz2 = (field->v[idx + stride_z] - 2.0 * field->v[idx] + field->v[idx - stride_z]) * inv_dz2;
     117              :                     double d2w_dx2 = (field->w[idx + 1] - 2.0 * field->w[idx] + field->w[idx - 1]) / (grid->dx[i] * grid->dx[i]);
     118              :                     double d2w_dy2 = (field->w[idx + nx] - 2.0 * field->w[idx] + field->w[idx - nx]) / (grid->dy[j] * grid->dy[j]);
     119              :                     double d2w_dz2 = (field->w[idx + stride_z] - 2.0 * field->w[idx] + field->w[idx - stride_z]) * inv_dz2;
     120              : 
     121              :                     if (field->rho[idx] <= 1e-10) {
     122              :                         continue;
     123              :                     }
     124              : 
     125              :                     double nu = params->mu / fmax(field->rho[idx], 1e-10);
     126              :                     nu = fmin(nu, 1.0);
     127              : 
     128              :                     // Source terms
     129              :                     double source_u = 0.0;
     130              :                     double source_v = 0.0;
     131              :                     double source_w = 0.0;
     132              :                     double z_coord = (nz > 1 && grid->z) ? grid->z[kk] : 0.0;
     133              :                     if (params->source_func) {
     134              :                         params->source_func(grid->x[i], grid->y[j], z_coord,
     135              :                                             iter * conservative_dt,
     136              :                                             params->source_context,
     137              :                                             &source_u, &source_v, &source_w);
     138              :                     } else {
     139              :                         source_u = params->source_amplitude_u * sin(M_PI * grid->y[j]) *
     140              :                                    exp(-params->source_decay_rate * iter * conservative_dt);
     141              :                         source_v = params->source_amplitude_v * sin(2.0 * M_PI * grid->x[i]) *
     142              :                                    exp(-params->source_decay_rate * iter * conservative_dt);
     143              :                     }
     144              : 
     145              :                     // Update u
     146              :                     double du = conservative_dt * (-field->u[idx] * du_dx - field->v[idx] * du_dy
     147              :                                                    - field->w[idx] * du_dz
     148              :                                                    - dp_dx / fmax(field->rho[idx], 1e-10)
     149              :                                                    + nu * (d2u_dx2 + d2u_dy2 + d2u_dz2) + source_u);
     150              : 
     151              :                     // Update v
     152              :                     double dv = conservative_dt * (-field->u[idx] * dv_dx - field->v[idx] * dv_dy
     153              :                                                    - field->w[idx] * dv_dz
     154              :                                                    - dp_dy / fmax(field->rho[idx], 1e-10)
     155              :                                                    + nu * (d2v_dx2 + d2v_dy2 + d2v_dz2) + source_v);
     156              : 
     157              :                     // Update w
     158              :                     double dw = conservative_dt * (-field->u[idx] * dw_dx - field->v[idx] * dw_dy
     159              :                                                    - field->w[idx] * dw_dz
     160              :                                                    - dp_dz / fmax(field->rho[idx], 1e-10)
     161              :                                                    + nu * (d2w_dx2 + d2w_dy2 + d2w_dz2) + source_w);
     162              : 
     163              :                     du = fmax(-UPDATE_LIMIT, fmin(UPDATE_LIMIT, du));
     164              :                     dv = fmax(-UPDATE_LIMIT, fmin(UPDATE_LIMIT, dv));
     165              :                     dw = fmax(-UPDATE_LIMIT, fmin(UPDATE_LIMIT, dw));
     166              : 
     167              :                     u_new[idx] = field->u[idx] + du;
     168              :                     v_new[idx] = field->v[idx] + dv;
     169              : 
     170              :                     // Limit velocity
     171              :                     u_new[idx] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, u_new[idx]));
     172              :                     v_new[idx] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, v_new[idx]));
     173              :                     w_new[idx] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, field->w[idx] + dw));
     174              : 
     175              :                     // Pressure update
     176              :                     double divergence = du_dx + dv_dy + dw_dz;
     177              :                     divergence = fmax(-MAX_DIVERGENCE_LIMIT, fmin(MAX_DIVERGENCE_LIMIT, divergence));
     178              : 
     179              :                     double dp =
     180              :                         -PRESSURE_UPDATE_FACTOR * conservative_dt * field->rho[idx] * divergence;
     181              :                     dp = fmax(-UPDATE_LIMIT, fmin(UPDATE_LIMIT, dp));
     182              :                     p_new[idx] = field->p[idx] + dp;
     183              :                 }
     184              :             }
     185              :         }
     186              : 
     187              :         // Copy back (implicit barrier at end of parallel for)
     188          212 :         memcpy(field->u, u_new, total * sizeof(double));
     189          212 :         memcpy(field->v, v_new, total * sizeof(double));
     190          212 :         memcpy(field->w, w_new, total * sizeof(double));
     191          212 :         memcpy(field->p, p_new, total * sizeof(double));
     192              : 
     193              :         // Store caller-set boundary values before apply_boundary_conditions overwrites them,
     194              :         // then restore them afterward.
     195          212 :         copy_boundary_velocities_3d(u_new, v_new, w_new,
     196          212 :                                     field->u, field->v, field->w, nx, ny, nz);
     197          212 :         apply_boundary_conditions(field, grid);
     198          212 :         copy_boundary_velocities_3d(field->u, field->v, field->w,
     199              :                                     u_new, v_new, w_new, nx, ny, nz);
     200              : 
     201              :         // Check for NaN/Inf values
     202          212 :         int has_nan = 0;
     203          212 :         ptrdiff_t total_int = (ptrdiff_t)total;
     204          212 :         ptrdiff_t ii;
     205          212 : #pragma omp parallel for reduction(| : has_nan) schedule(static)
     206              :         for (ii = 0; ii < total_int; ii++) {
     207              :             if (!isfinite(field->u[ii]) || !isfinite(field->v[ii]) ||
     208              :                 !isfinite(field->w[ii]) || !isfinite(field->p[ii])) {
     209              :                 has_nan = 1;
     210              :             }
     211              :         }
     212              : 
     213          212 :         if (has_nan) {
     214            0 :             cfd_free(u_new);
     215            0 :             cfd_free(v_new);
     216            0 :             cfd_free(w_new);
     217            0 :             cfd_free(p_new);
     218            0 :             cfd_set_error(CFD_ERROR_DIVERGED,
     219              :                           "NaN/Inf detected in explicit_euler_omp step");
     220            0 :             return CFD_ERROR_DIVERGED;
     221              :         }
     222              :     }
     223              : 
     224          212 :     cfd_free(u_new);
     225          212 :     cfd_free(v_new);
     226          212 :     cfd_free(w_new);
     227          212 :     cfd_free(p_new);
     228              : 
     229          212 :     return CFD_SUCCESS;
     230              : }
        

Generated by: LCOV version 2.0-1