Line data Source code
1 : /**
2 : * @file solver_rk2.c
3 : * @brief RK2 (Heun's method) time integration for Navier-Stokes
4 : *
5 : * Second-order Runge-Kutta time stepping:
6 : * k1 = RHS(Q^n)
7 : * Q_pred = Q^n + dt * k1
8 : * k2 = RHS(Q_pred)
9 : * Q^{n+1} = Q^n + (dt/2) * (k1 + k2)
10 : *
11 : * Uses the same spatial discretisation (central differences) and physics
12 : * as the explicit Euler solver, but achieves O(dt^2) temporal accuracy
13 : * instead of O(dt).
14 : *
15 : * Branch-free 3D: when nz==1, stride_z=0 and inv_2dz/inv_dz2=0.0 cause all
16 : * z-terms to vanish, producing bit-identical results to the 2D code path.
17 : */
18 :
19 : #include "cfd/core/cfd_status.h"
20 : #include "cfd/core/grid.h"
21 : #include "cfd/core/indexing.h"
22 : #include "cfd/core/memory.h"
23 : #include "cfd/solvers/navier_stokes_solver.h"
24 :
25 : #include <math.h>
26 : #include <string.h>
27 :
28 : #ifndef M_PI
29 : #define M_PI 3.14159265358979323846
30 : #endif
31 :
32 : /* Physical stability limits */
33 : #define MAX_DERIVATIVE_LIMIT 100.0
34 : #define MAX_SECOND_DERIVATIVE_LIMIT 1000.0
35 : #define MAX_VELOCITY_LIMIT 100.0
36 : #define MAX_DIVERGENCE_LIMIT 10.0
37 :
38 : #define PRESSURE_UPDATE_FACTOR 0.1
39 :
40 : /* ============================================================================
41 : * RHS EVALUATION
42 : * ============================================================================ */
43 :
44 : /**
45 : * Compute the right-hand side of the semi-discrete Navier-Stokes equations.
46 : *
47 : * For each interior point, computes:
48 : * du/dt = -u·∇u - (1/ρ)∂p/∂x + ν∇²u + source_u
49 : * dv/dt = -u·∇v - (1/ρ)∂p/∂y + ν∇²v + source_v
50 : * dw/dt = -u·∇w - (1/ρ)∂p/∂z + ν∇²w + source_w
51 : * dp/dt = -0.1 * ρ * (∂u/∂x + ∂v/∂y + ∂w/∂z)
52 : *
53 : * Uses periodic stencil indices to avoid relying on ghost cells, which is
54 : * critical for preserving RK2 temporal order (ghost cell values may be stale
55 : * during intermediate RK stages).
56 : */
57 8084 : static void compute_rhs(const double* u, const double* v, const double* w,
58 : const double* p, const double* rho,
59 : double* rhs_u, double* rhs_v, double* rhs_w, double* rhs_p,
60 : const grid* grid, const ns_solver_params_t* params,
61 : size_t nx, size_t ny, size_t nz,
62 : size_t stride_z, size_t k_start, size_t k_end,
63 : double inv_2dz, double inv_dz2,
64 : int iter, double dt) {
65 16698 : for (size_t k = k_start; k < k_end; k++) {
66 762906 : for (size_t j = 1; j < ny - 1; j++) {
67 86011404 : for (size_t i = 1; i < nx - 1; i++) {
68 85257112 : size_t idx = k * stride_z + IDX_2D(i, j, nx);
69 :
70 : /* Safety checks */
71 85257112 : if (rho[idx] <= 1e-10) {
72 0 : rhs_u[idx] = 0.0;
73 0 : rhs_v[idx] = 0.0;
74 0 : rhs_w[idx] = 0.0;
75 0 : rhs_p[idx] = 0.0;
76 0 : continue;
77 : }
78 85257112 : if (fabs(grid->dx[i]) < 1e-10 || fabs(grid->dy[j]) < 1e-10) {
79 0 : rhs_u[idx] = 0.0;
80 0 : rhs_v[idx] = 0.0;
81 0 : rhs_w[idx] = 0.0;
82 0 : rhs_p[idx] = 0.0;
83 0 : continue;
84 : }
85 :
86 : /* Periodic stencil indices in x and y — avoids relying on ghost cells,
87 : * which is critical for preserving RK2 temporal order. */
88 85257112 : size_t il = (i > 1) ? idx - 1 : k * stride_z + IDX_2D(nx - 2, j, nx);
89 85257112 : size_t ir = (i < nx - 2) ? idx + 1 : k * stride_z + IDX_2D(1, j, nx);
90 85257112 : size_t jd = (j > 1) ? idx - nx : k * stride_z + IDX_2D(i, ny - 2, nx);
91 85257112 : size_t ju = (j < ny - 2) ? idx + nx : k * stride_z + IDX_2D(i, 1, nx);
92 :
93 : /* Periodic stencil indices in z.
94 : * When nz==1: k=0, stride_z=0, so kd=ku=idx → z-terms vanish. */
95 170514224 : size_t kd = (k > 1) ? idx - stride_z
96 85257112 : : (nz - 2) * stride_z + IDX_2D(i, j, nx);
97 170514224 : size_t ku = (k < nz - 2) ? idx + stride_z
98 85257112 : : 1 * stride_z + IDX_2D(i, j, nx);
99 :
100 : /* First derivatives (central differences) */
101 85257112 : double du_dx = (u[ir] - u[il]) / (2.0 * grid->dx[i]);
102 85257112 : double du_dy = (u[ju] - u[jd]) / (2.0 * grid->dy[j]);
103 85257112 : double du_dz = (u[ku] - u[kd]) * inv_2dz;
104 :
105 85257112 : double dv_dx = (v[ir] - v[il]) / (2.0 * grid->dx[i]);
106 85257112 : double dv_dy = (v[ju] - v[jd]) / (2.0 * grid->dy[j]);
107 85257112 : double dv_dz = (v[ku] - v[kd]) * inv_2dz;
108 :
109 85257112 : double dw_dx = (w[ir] - w[il]) / (2.0 * grid->dx[i]);
110 85257112 : double dw_dy = (w[ju] - w[jd]) / (2.0 * grid->dy[j]);
111 85257112 : double dw_dz = (w[ku] - w[kd]) * inv_2dz;
112 :
113 : /* Pressure gradients */
114 85257112 : double dp_dx = (p[ir] - p[il]) / (2.0 * grid->dx[i]);
115 85257112 : double dp_dy = (p[ju] - p[jd]) / (2.0 * grid->dy[j]);
116 85257112 : double dp_dz = (p[ku] - p[kd]) * inv_2dz;
117 :
118 : /* Second derivatives (viscous terms) */
119 85257112 : double d2u_dx2 = (u[ir] - 2.0 * u[idx] + u[il]) / (grid->dx[i] * grid->dx[i]);
120 85257112 : double d2u_dy2 = (u[ju] - 2.0 * u[idx] + u[jd]) / (grid->dy[j] * grid->dy[j]);
121 85257112 : double d2u_dz2 = (u[ku] - 2.0 * u[idx] + u[kd]) * inv_dz2;
122 :
123 85257112 : double d2v_dx2 = (v[ir] - 2.0 * v[idx] + v[il]) / (grid->dx[i] * grid->dx[i]);
124 85257112 : double d2v_dy2 = (v[ju] - 2.0 * v[idx] + v[jd]) / (grid->dy[j] * grid->dy[j]);
125 85257112 : double d2v_dz2 = (v[ku] - 2.0 * v[idx] + v[kd]) * inv_dz2;
126 :
127 85257112 : double d2w_dx2 = (w[ir] - 2.0 * w[idx] + w[il]) / (grid->dx[i] * grid->dx[i]);
128 85257112 : double d2w_dy2 = (w[ju] - 2.0 * w[idx] + w[jd]) / (grid->dy[j] * grid->dy[j]);
129 85257112 : double d2w_dz2 = (w[ku] - 2.0 * w[idx] + w[kd]) * inv_dz2;
130 :
131 : /* Kinematic viscosity */
132 85257112 : double nu = params->mu / fmax(rho[idx], 1e-10);
133 85257112 : nu = fmin(nu, 1.0);
134 :
135 : /* Clamp first derivatives */
136 85257112 : du_dx = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, du_dx));
137 85257112 : du_dy = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, du_dy));
138 85257112 : du_dz = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, du_dz));
139 85257112 : dv_dx = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, dv_dx));
140 85257112 : dv_dy = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, dv_dy));
141 85257112 : dv_dz = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, dv_dz));
142 85257112 : dw_dx = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, dw_dx));
143 85257112 : dw_dy = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, dw_dy));
144 85257112 : dw_dz = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, dw_dz));
145 85257112 : dp_dx = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, dp_dx));
146 85257112 : dp_dy = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, dp_dy));
147 85257112 : dp_dz = fmax(-MAX_DERIVATIVE_LIMIT, fmin(MAX_DERIVATIVE_LIMIT, dp_dz));
148 :
149 : /* Clamp second derivatives */
150 85257112 : d2u_dx2 = fmax(-MAX_SECOND_DERIVATIVE_LIMIT, fmin(MAX_SECOND_DERIVATIVE_LIMIT, d2u_dx2));
151 85257112 : d2u_dy2 = fmax(-MAX_SECOND_DERIVATIVE_LIMIT, fmin(MAX_SECOND_DERIVATIVE_LIMIT, d2u_dy2));
152 85257112 : d2u_dz2 = fmax(-MAX_SECOND_DERIVATIVE_LIMIT, fmin(MAX_SECOND_DERIVATIVE_LIMIT, d2u_dz2));
153 85257112 : d2v_dx2 = fmax(-MAX_SECOND_DERIVATIVE_LIMIT, fmin(MAX_SECOND_DERIVATIVE_LIMIT, d2v_dx2));
154 85257112 : d2v_dy2 = fmax(-MAX_SECOND_DERIVATIVE_LIMIT, fmin(MAX_SECOND_DERIVATIVE_LIMIT, d2v_dy2));
155 85257112 : d2v_dz2 = fmax(-MAX_SECOND_DERIVATIVE_LIMIT, fmin(MAX_SECOND_DERIVATIVE_LIMIT, d2v_dz2));
156 85257112 : d2w_dx2 = fmax(-MAX_SECOND_DERIVATIVE_LIMIT, fmin(MAX_SECOND_DERIVATIVE_LIMIT, d2w_dx2));
157 85257112 : d2w_dy2 = fmax(-MAX_SECOND_DERIVATIVE_LIMIT, fmin(MAX_SECOND_DERIVATIVE_LIMIT, d2w_dy2));
158 85257112 : d2w_dz2 = fmax(-MAX_SECOND_DERIVATIVE_LIMIT, fmin(MAX_SECOND_DERIVATIVE_LIMIT, d2w_dz2));
159 :
160 : /* Source terms */
161 85257112 : double source_u = 0.0, source_v = 0.0, source_w = 0.0;
162 85257112 : double z_coord = (nz > 1 && grid->z) ? grid->z[k] : 0.0;
163 85257112 : compute_source_terms(grid->x[i], grid->y[j], z_coord, iter, dt,
164 : params, &source_u, &source_v, &source_w);
165 :
166 : /* RHS for u-momentum */
167 85257112 : rhs_u[idx] = -u[idx] * du_dx - v[idx] * du_dy - w[idx] * du_dz
168 85257112 : - dp_dx / rho[idx]
169 85257112 : + nu * (d2u_dx2 + d2u_dy2 + d2u_dz2)
170 85257112 : + source_u;
171 :
172 : /* RHS for v-momentum */
173 85257112 : rhs_v[idx] = -u[idx] * dv_dx - v[idx] * dv_dy - w[idx] * dv_dz
174 85257112 : - dp_dy / rho[idx]
175 85257112 : + nu * (d2v_dx2 + d2v_dy2 + d2v_dz2)
176 85257112 : + source_v;
177 :
178 : /* RHS for w-momentum */
179 85257112 : rhs_w[idx] = -u[idx] * dw_dx - v[idx] * dw_dy - w[idx] * dw_dz
180 85257112 : - dp_dz / rho[idx]
181 85257112 : + nu * (d2w_dx2 + d2w_dy2 + d2w_dz2)
182 85257112 : + source_w;
183 :
184 : /* Simplified pressure RHS (divergence-based) */
185 85257112 : double divergence = du_dx + dv_dy + dw_dz;
186 85257112 : divergence = fmax(-MAX_DIVERGENCE_LIMIT,
187 : fmin(MAX_DIVERGENCE_LIMIT, divergence));
188 85257112 : rhs_p[idx] = -PRESSURE_UPDATE_FACTOR * rho[idx] * divergence;
189 : }
190 : }
191 : }
192 8084 : }
193 :
194 : /* ============================================================================
195 : * RK2 SOLVER
196 : * ============================================================================ */
197 :
198 4042 : cfd_status_t rk2_impl(flow_field* field, const grid* grid,
199 : const ns_solver_params_t* params) {
200 4042 : if (field->nx < 3 || field->ny < 3 || (field->nz > 1 && field->nz < 3)) {
201 : return CFD_ERROR_INVALID;
202 : }
203 :
204 4042 : size_t nx = field->nx;
205 4042 : size_t ny = field->ny;
206 4042 : size_t nz = field->nz;
207 :
208 : /* Reject non-uniform z-spacing (solver uses constant inv_2dz/inv_dz2) */
209 4042 : if (nz > 1 && grid->dz) {
210 371 : for (size_t k = 1; k < nz - 1; k++) {
211 318 : if (fabs(grid->dz[k] - grid->dz[0]) > 1e-14) {
212 : return CFD_ERROR_INVALID;
213 : }
214 : }
215 : }
216 :
217 4042 : size_t plane = nx * ny;
218 4042 : size_t total = plane * nz;
219 4042 : size_t bytes = total * sizeof(double);
220 :
221 : /* Branch-free 3D constants */
222 4042 : size_t stride_z = (nz > 1) ? plane : 0;
223 4042 : size_t k_start = (nz > 1) ? 1 : 0;
224 4042 : size_t k_end = (nz > 1) ? (nz - 1) : 1;
225 4042 : double inv_2dz = (nz > 1 && grid->dz) ? 1.0 / (2.0 * grid->dz[0]) : 0.0;
226 53 : double inv_dz2 = (nz > 1 && grid->dz) ? 1.0 / (grid->dz[0] * grid->dz[0]) : 0.0;
227 :
228 : /* Allocate working arrays:
229 : * k1_u/v/w/p : Stage 1 derivatives
230 : * k2_u/v/w/p : Stage 2 derivatives
231 : * u0/v0/w0/p0 : Saved state Q^n
232 : */
233 4042 : double* k1_u = (double*)cfd_calloc(total, sizeof(double));
234 4042 : double* k1_v = (double*)cfd_calloc(total, sizeof(double));
235 4042 : double* k1_w = (double*)cfd_calloc(total, sizeof(double));
236 4042 : double* k1_p = (double*)cfd_calloc(total, sizeof(double));
237 4042 : double* k2_u = (double*)cfd_calloc(total, sizeof(double));
238 4042 : double* k2_v = (double*)cfd_calloc(total, sizeof(double));
239 4042 : double* k2_w = (double*)cfd_calloc(total, sizeof(double));
240 4042 : double* k2_p = (double*)cfd_calloc(total, sizeof(double));
241 4042 : double* u0 = (double*)cfd_calloc(total, sizeof(double));
242 4042 : double* v0 = (double*)cfd_calloc(total, sizeof(double));
243 4042 : double* w0 = (double*)cfd_calloc(total, sizeof(double));
244 4042 : double* p0 = (double*)cfd_calloc(total, sizeof(double));
245 :
246 4042 : if (!k1_u || !k1_v || !k1_w || !k1_p ||
247 4042 : !k2_u || !k2_v || !k2_w || !k2_p ||
248 4042 : !u0 || !v0 || !w0 || !p0) {
249 0 : cfd_free(k1_u); cfd_free(k1_v); cfd_free(k1_w); cfd_free(k1_p);
250 0 : cfd_free(k2_u); cfd_free(k2_v); cfd_free(k2_w); cfd_free(k2_p);
251 0 : cfd_free(u0); cfd_free(v0); cfd_free(w0); cfd_free(p0);
252 0 : return CFD_ERROR_NOMEM;
253 : }
254 :
255 4042 : double dt = params->dt;
256 4042 : cfd_status_t status = CFD_SUCCESS;
257 :
258 8084 : for (int iter = 0; iter < params->max_iter; iter++) {
259 : /* Save Q^n */
260 4042 : memcpy(u0, field->u, bytes);
261 4042 : memcpy(v0, field->v, bytes);
262 4042 : memcpy(w0, field->w, bytes);
263 4042 : memcpy(p0, field->p, bytes);
264 :
265 : /* ---- Stage 1: k1 = RHS(Q^n) ---- */
266 4042 : memset(k1_u, 0, bytes);
267 4042 : memset(k1_v, 0, bytes);
268 4042 : memset(k1_w, 0, bytes);
269 4042 : memset(k1_p, 0, bytes);
270 :
271 4042 : compute_rhs(field->u, field->v, field->w, field->p, field->rho,
272 : k1_u, k1_v, k1_w, k1_p,
273 : grid, params, nx, ny, nz,
274 : stride_z, k_start, k_end, inv_2dz, inv_dz2,
275 : iter, dt);
276 :
277 : /* ---- Intermediate: field = Q^n + dt * k1 ---- */
278 44168394 : for (size_t n = 0; n < total; n++) {
279 44164352 : field->u[n] = u0[n] + dt * k1_u[n];
280 44164352 : field->v[n] = v0[n] + dt * k1_v[n];
281 44164352 : field->w[n] = w0[n] + dt * k1_w[n];
282 44164352 : field->p[n] = p0[n] + dt * k1_p[n];
283 :
284 44164352 : field->u[n] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, field->u[n]));
285 44164352 : field->v[n] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, field->v[n]));
286 44164352 : field->w[n] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, field->w[n]));
287 : }
288 :
289 : /* NOTE: Do NOT apply BCs between RK stages. The ghost cells carry
290 : * zero-derivative evolution (k1[ghost]=0), which is consistent with
291 : * the semi-discrete ODE system. Applying BCs here would modify the
292 : * intermediate state outside the ODE trajectory and reduce RK2 to
293 : * first-order temporal accuracy. */
294 :
295 : /* ---- Stage 2: k2 = RHS(Q_pred) ---- */
296 4042 : memset(k2_u, 0, bytes);
297 4042 : memset(k2_v, 0, bytes);
298 4042 : memset(k2_w, 0, bytes);
299 4042 : memset(k2_p, 0, bytes);
300 :
301 4042 : compute_rhs(field->u, field->v, field->w, field->p, field->rho,
302 : k2_u, k2_v, k2_w, k2_p,
303 : grid, params, nx, ny, nz,
304 : stride_z, k_start, k_end, inv_2dz, inv_dz2,
305 : iter, dt);
306 :
307 : /* ---- Final update: Q^{n+1} = Q^n + (dt/2)*(k1 + k2) ---- */
308 4042 : double half_dt = 0.5 * dt;
309 44168394 : for (size_t n = 0; n < total; n++) {
310 44164352 : field->u[n] = u0[n] + half_dt * (k1_u[n] + k2_u[n]);
311 44164352 : field->v[n] = v0[n] + half_dt * (k1_v[n] + k2_v[n]);
312 44164352 : field->w[n] = w0[n] + half_dt * (k1_w[n] + k2_w[n]);
313 44164352 : field->p[n] = p0[n] + half_dt * (k1_p[n] + k2_p[n]);
314 :
315 44164352 : field->u[n] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, field->u[n]));
316 44164352 : field->v[n] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, field->v[n]));
317 44164352 : field->w[n] = fmax(-MAX_VELOCITY_LIMIT, fmin(MAX_VELOCITY_LIMIT, field->w[n]));
318 : }
319 :
320 : /* Apply BCs to final state only (after the full RK2 step).
321 : * This updates ghost cells for the next step's k1 evaluation. */
322 4042 : apply_boundary_conditions(field, grid);
323 :
324 : /* NaN / Inf check */
325 44168394 : for (size_t n = 0; n < total; n++) {
326 44164352 : if (!isfinite(field->u[n]) || !isfinite(field->v[n]) ||
327 44164352 : !isfinite(field->w[n]) || !isfinite(field->p[n])) {
328 0 : status = CFD_ERROR_DIVERGED;
329 0 : goto cleanup;
330 : }
331 : }
332 : }
333 :
334 4042 : cleanup:
335 4042 : cfd_free(k1_u); cfd_free(k1_v); cfd_free(k1_w); cfd_free(k1_p);
336 4042 : cfd_free(k2_u); cfd_free(k2_v); cfd_free(k2_w); cfd_free(k2_p);
337 4042 : cfd_free(u0); cfd_free(v0); cfd_free(w0); cfd_free(p0);
338 :
339 4042 : return status;
340 : }
|