Line data Source code
1 : #include "cfd/core/cfd_status.h"
2 : #include "cfd/core/grid.h"
3 : #include "cfd/core/indexing.h"
4 : #include "cfd/core/memory.h"
5 :
6 : #include "cfd/solvers/navier_stokes_solver.h"
7 : #include "../boundary_copy_utils.h"
8 : #include <math.h>
9 : #include <omp.h>
10 : #include <stdio.h>
11 : #include <string.h>
12 :
13 :
14 :
15 : #ifndef M_PI
16 : #define M_PI 3.14159265358979323846
17 : #endif
18 :
19 : // Physical stability limits (same as cpu solver)
20 : #define MAX_DERIVATIVE_LIMIT 100.0
21 : #define MAX_SECOND_DERIVATIVE_LIMIT 1000.0
22 : #define MAX_VELOCITY_LIMIT 100.0
23 : #define MAX_DIVERGENCE_LIMIT 10.0
24 : #define DT_CONSERVATIVE_LIMIT 0.0001
25 : #define UPDATE_LIMIT 1.0
26 : #define PRESSURE_UPDATE_FACTOR 0.1
27 :
28 : // Internal OpenMP explicit Euler implementation
29 212 : cfd_status_t explicit_euler_omp_impl(flow_field* field, const grid* grid,
30 : const ns_solver_params_t* params) {
31 212 : if (field->nx < 3 || field->ny < 3 || (field->nz > 1 && field->nz < 3)) {
32 : return CFD_ERROR_INVALID;
33 : }
34 :
35 212 : size_t nz = field->nz;
36 212 : if (nz > 1 && grid->dz) {
37 14 : for (size_t kk = 1; kk < nz - 1; kk++) {
38 12 : if (fabs(grid->dz[kk] - grid->dz[0]) > 1e-14) {
39 : return CFD_ERROR_INVALID;
40 : }
41 : }
42 : }
43 :
44 212 : size_t nx = field->nx;
45 212 : size_t ny = field->ny;
46 212 : size_t plane = nx * ny;
47 212 : size_t total = plane * nz;
48 :
49 212 : size_t stride_z = (nz > 1) ? plane : 0;
50 212 : size_t k_start = (nz > 1) ? 1 : 0;
51 212 : size_t k_end = (nz > 1) ? (nz - 1) : 1;
52 212 : double inv_2dz = (nz > 1 && grid->dz) ? 1.0 / (2.0 * grid->dz[0]) : 0.0;
53 2 : double inv_dz2 = (nz > 1 && grid->dz) ? 1.0 / (grid->dz[0] * grid->dz[0]) : 0.0;
54 :
55 : // Allocate temporary arrays
56 212 : double* u_new = (double*)cfd_calloc(total, sizeof(double));
57 212 : double* v_new = (double*)cfd_calloc(total, sizeof(double));
58 212 : double* w_new = (double*)cfd_calloc(total, sizeof(double));
59 212 : double* p_new = (double*)cfd_calloc(total, sizeof(double));
60 :
61 212 : if (!u_new || !v_new || !w_new || !p_new) {
62 0 : cfd_free(u_new);
63 0 : cfd_free(v_new);
64 0 : cfd_free(w_new);
65 0 : cfd_free(p_new);
66 0 : return CFD_ERROR_NOMEM;
67 : }
68 :
69 : // Initialize with current values
70 212 : memcpy(u_new, field->u, total * sizeof(double));
71 212 : memcpy(v_new, field->v, total * sizeof(double));
72 212 : memcpy(w_new, field->w, total * sizeof(double));
73 212 : memcpy(p_new, field->p, total * sizeof(double));
74 :
75 212 : double conservative_dt = fmin(params->dt, DT_CONSERVATIVE_LIMIT);
76 :
77 424 : for (int iter = 0; iter < params->max_iter; iter++) {
78 434 : for (size_t kk = k_start; kk < k_end; kk++) {
79 222 : int j;
80 222 : #pragma omp parallel for schedule(static)
81 : for (j = 1; j < (int)ny - 1; j++) {
82 : for (int i = 1; i < (int)nx - 1; i++) {
83 : size_t idx = (kk * stride_z) + IDX_2D(i, j, nx);
84 :
85 : // Derivatives
86 : double du_dx = (field->u[idx + 1] - field->u[idx - 1]) / (2.0 * grid->dx[i]);
87 : double du_dy =
88 : (field->u[idx + nx] - field->u[idx - nx]) / (2.0 * grid->dy[j]);
89 : double dv_dx = (field->v[idx + 1] - field->v[idx - 1]) / (2.0 * grid->dx[i]);
90 : double dv_dy =
91 : (field->v[idx + nx] - field->v[idx - nx]) / (2.0 * grid->dy[j]);
92 :
93 : double dp_dx = (field->p[idx + 1] - field->p[idx - 1]) / (2.0 * grid->dx[i]);
94 : double dp_dy =
95 : (field->p[idx + nx] - field->p[idx - nx]) / (2.0 * grid->dy[j]);
96 :
97 : double d2u_dx2 = (field->u[idx + 1] - 2.0 * field->u[idx] + field->u[idx - 1]) /
98 : (grid->dx[i] * grid->dx[i]);
99 : double d2u_dy2 =
100 : (field->u[idx + nx] - 2.0 * field->u[idx] + field->u[idx - nx]) /
101 : (grid->dy[j] * grid->dy[j]);
102 : double d2v_dx2 = (field->v[idx + 1] - 2.0 * field->v[idx] + field->v[idx - 1]) /
103 : (grid->dx[i] * grid->dx[i]);
104 : double d2v_dy2 =
105 : (field->v[idx + nx] - 2.0 * field->v[idx] + field->v[idx - nx]) /
106 : (grid->dy[j] * grid->dy[j]);
107 :
108 : double du_dz = (field->u[idx + stride_z] - field->u[idx - stride_z]) * inv_2dz;
109 : double dv_dz = (field->v[idx + stride_z] - field->v[idx - stride_z]) * inv_2dz;
110 : double dw_dx = (field->w[idx + 1] - field->w[idx - 1]) / (2.0 * grid->dx[i]);
111 : double dw_dy = (field->w[idx + nx] - field->w[idx - nx]) / (2.0 * grid->dy[j]);
112 : double dw_dz = (field->w[idx + stride_z] - field->w[idx - stride_z]) * inv_2dz;
113 : double dp_dz = (field->p[idx + stride_z] - field->p[idx - stride_z]) * inv_2dz;
114 :
115 : double d2u_dz2 = (field->u[idx + stride_z] - 2.0 * field->u[idx] + field->u[idx - stride_z]) * inv_dz2;
116 : double d2v_dz2 = (field->v[idx + stride_z] - 2.0 * field->v[idx] + field->v[idx - stride_z]) * inv_dz2;
117 : double d2w_dx2 = (field->w[idx + 1] - 2.0 * field->w[idx] + field->w[idx - 1]) / (grid->dx[i] * grid->dx[i]);
118 : double d2w_dy2 = (field->w[idx + nx] - 2.0 * field->w[idx] + field->w[idx - nx]) / (grid->dy[j] * grid->dy[j]);
119 : double d2w_dz2 = (field->w[idx + stride_z] - 2.0 * field->w[idx] + field->w[idx - stride_z]) * inv_dz2;
120 :
121 : if (field->rho[idx] <= 1e-10) {
122 : continue;
123 : }
124 :
125 : double nu = params->mu / fmax(field->rho[idx], 1e-10);
126 : nu = fmin(nu, 1.0);
127 :
128 : // Source terms
129 : double source_u = 0.0;
130 : double source_v = 0.0;
131 : double source_w = 0.0;
132 : double z_coord = (nz > 1 && grid->z) ? grid->z[kk] : 0.0;
133 : if (params->source_func) {
134 : params->source_func(grid->x[i], grid->y[j], z_coord,
135 : iter * conservative_dt,
136 : params->source_context,
137 : &source_u, &source_v, &source_w);
138 : } else {
139 : source_u = params->source_amplitude_u * sin(M_PI * grid->y[j]) *
140 : exp(-params->source_decay_rate * iter * conservative_dt);
141 : source_v = params->source_amplitude_v * sin(2.0 * M_PI * grid->x[i]) *
142 : exp(-params->source_decay_rate * iter * conservative_dt);
143 : }
144 :
145 : // Update u
146 : double du = conservative_dt * (-field->u[idx] * du_dx - field->v[idx] * du_dy
147 : - field->w[idx] * du_dz
148 : - dp_dx / fmax(field->rho[idx], 1e-10)
149 : + nu * (d2u_dx2 + d2u_dy2 + d2u_dz2) + source_u);
150 :
151 : // Update v
152 : double dv = conservative_dt * (-field->u[idx] * dv_dx - field->v[idx] * dv_dy
153 : - field->w[idx] * dv_dz
154 : - dp_dy / fmax(field->rho[idx], 1e-10)
155 : + nu * (d2v_dx2 + d2v_dy2 + d2v_dz2) + source_v);
156 :
157 : // Update w
158 : double dw = conservative_dt * (-field->u[idx] * dw_dx - field->v[idx] * dw_dy
159 : - field->w[idx] * dw_dz
160 : - dp_dz / fmax(field->rho[idx], 1e-10)
161 : + nu * (d2w_dx2 + d2w_dy2 + d2w_dz2) + source_w);
162 :
163 : du = fmax(-UPDATE_LIMIT, fmin(UPDATE_LIMIT, du));
164 : dv = fmax(-UPDATE_LIMIT, fmin(UPDATE_LIMIT, dv));
165 : dw = fmax(-UPDATE_LIMIT, fmin(UPDATE_LIMIT, dw));
166 :
167 : u_new[idx] = field->u[idx] + du;
168 : v_new[idx] = field->v[idx] + dv;
169 :
170 : // Limit velocity
171 : u_new[idx] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, u_new[idx]));
172 : v_new[idx] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, v_new[idx]));
173 : w_new[idx] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, field->w[idx] + dw));
174 :
175 : // Pressure update
176 : double divergence = du_dx + dv_dy + dw_dz;
177 : divergence = fmax(-MAX_DIVERGENCE_LIMIT, fmin(MAX_DIVERGENCE_LIMIT, divergence));
178 :
179 : double dp =
180 : -PRESSURE_UPDATE_FACTOR * conservative_dt * field->rho[idx] * divergence;
181 : dp = fmax(-UPDATE_LIMIT, fmin(UPDATE_LIMIT, dp));
182 : p_new[idx] = field->p[idx] + dp;
183 : }
184 : }
185 : }
186 :
187 : // Copy back (implicit barrier at end of parallel for)
188 212 : memcpy(field->u, u_new, total * sizeof(double));
189 212 : memcpy(field->v, v_new, total * sizeof(double));
190 212 : memcpy(field->w, w_new, total * sizeof(double));
191 212 : memcpy(field->p, p_new, total * sizeof(double));
192 :
193 : // Store caller-set boundary values before apply_boundary_conditions overwrites them,
194 : // then restore them afterward.
195 212 : copy_boundary_velocities_3d(u_new, v_new, w_new,
196 212 : field->u, field->v, field->w, nx, ny, nz);
197 212 : apply_boundary_conditions(field, grid);
198 212 : copy_boundary_velocities_3d(field->u, field->v, field->w,
199 : u_new, v_new, w_new, nx, ny, nz);
200 :
201 : // Check for NaN/Inf values
202 212 : int has_nan = 0;
203 212 : ptrdiff_t total_int = (ptrdiff_t)total;
204 212 : ptrdiff_t ii;
205 212 : #pragma omp parallel for reduction(| : has_nan) schedule(static)
206 : for (ii = 0; ii < total_int; ii++) {
207 : if (!isfinite(field->u[ii]) || !isfinite(field->v[ii]) ||
208 : !isfinite(field->w[ii]) || !isfinite(field->p[ii])) {
209 : has_nan = 1;
210 : }
211 : }
212 :
213 212 : if (has_nan) {
214 0 : cfd_free(u_new);
215 0 : cfd_free(v_new);
216 0 : cfd_free(w_new);
217 0 : cfd_free(p_new);
218 0 : cfd_set_error(CFD_ERROR_DIVERGED,
219 : "NaN/Inf detected in explicit_euler_omp step");
220 0 : return CFD_ERROR_DIVERGED;
221 : }
222 : }
223 :
224 212 : cfd_free(u_new);
225 212 : cfd_free(v_new);
226 212 : cfd_free(w_new);
227 212 : cfd_free(p_new);
228 :
229 212 : return CFD_SUCCESS;
230 : }
|