Line data Source code
1 : #include "cfd/core/cfd_status.h"
2 : #include "cfd/core/cpu_features.h"
3 : #include "cfd/core/filesystem.h"
4 : #include "cfd/core/gpu_device.h"
5 : #include "cfd/core/grid.h"
6 : #include "cfd/core/memory.h"
7 : #include "cfd/solvers/navier_stokes_solver.h"
8 : #include "cfd/solvers/poisson_solver.h"
9 :
10 :
11 : #ifdef _WIN32
12 : #define WIN32_LEAN_AND_MEAN
13 : #include <windows.h>
14 : #endif
15 :
16 : #include <math.h>
17 : #include <stddef.h>
18 : #include <stdio.h>
19 : #include <stdlib.h>
20 : #include <string.h>
21 :
22 : #ifdef CFD_ENABLE_OPENMP
23 : #include <omp.h>
24 : #endif
25 :
26 : // Forward declarations for internal solver implementations
27 : // These are not part of the public API
28 : cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_solver_params_t* params);
29 : cfd_status_t rk2_impl(flow_field* field, const grid* grid, const ns_solver_params_t* params);
30 : void explicit_euler_optimized_impl(flow_field* field, const grid* grid,
31 : const ns_solver_params_t* params);
32 : #ifdef CFD_ENABLE_OPENMP
33 : cfd_status_t explicit_euler_omp_impl(flow_field* field, const grid* grid,
34 : const ns_solver_params_t* params);
35 : cfd_status_t rk2_omp_impl(flow_field* field, const grid* grid,
36 : const ns_solver_params_t* params);
37 : cfd_status_t solve_projection_method_omp(flow_field* field, const grid* grid,
38 : const ns_solver_params_t* params);
39 : #endif
40 :
41 : // GPU solver functions
42 : cfd_status_t solve_projection_method_gpu(flow_field* field, const grid* grid,
43 : const ns_solver_params_t* params,
44 : const gpu_config_t* config);
45 : int gpu_is_available(void);
46 :
47 : // SIMD solver functions
48 :
49 : cfd_status_t explicit_euler_simd_init(struct NSSolver* solver, const grid* grid,
50 : const ns_solver_params_t* params);
51 : void explicit_euler_simd_destroy(struct NSSolver* solver);
52 :
53 : cfd_status_t explicit_euler_simd_step(struct NSSolver* solver, flow_field* field, const grid* grid,
54 : const ns_solver_params_t* params, ns_solver_stats_t* stats);
55 :
56 :
57 : cfd_status_t projection_simd_init(ns_solver_t* solver, const grid* grid, const ns_solver_params_t* params);
58 : void projection_simd_destroy(ns_solver_t* solver);
59 :
60 : cfd_status_t projection_simd_step(ns_solver_t* solver, flow_field* field, const grid* grid,
61 : const ns_solver_params_t* params, ns_solver_stats_t* stats);
62 :
63 : cfd_status_t rk2_avx2_init(ns_solver_t* solver, const grid* grid,
64 : const ns_solver_params_t* params);
65 : void rk2_avx2_destroy(ns_solver_t* solver);
66 : cfd_status_t rk2_avx2_step(ns_solver_t* solver, flow_field* field, const grid* grid,
67 : const ns_solver_params_t* params, ns_solver_stats_t* stats);
68 : cfd_status_t rk2_avx2_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
69 : const ns_solver_params_t* params, ns_solver_stats_t* stats);
70 :
71 : #ifdef _WIN32
72 : #else
73 : #include <sys/time.h>
74 : #endif
75 :
76 : // Maximum number of registered solver types
77 : #define MAX_REGISTERED_SOLVERS 32
78 :
79 : // Registry entry
80 : typedef struct {
81 : char name[64];
82 : ns_solver_factory_func factory;
83 : const char* description;
84 : ns_solver_backend_t backend; // Backend type for efficient filtering
85 : } solver_registry_entry;
86 :
87 : // ns_solver_registry_t structure
88 : struct NSSolverRegistry {
89 : solver_registry_entry entries[MAX_REGISTERED_SOLVERS];
90 : int count;
91 : };
92 :
93 : // Forward declarations for built-in solver factories
94 : static ns_solver_t* create_explicit_euler_solver(void);
95 : static ns_solver_t* create_rk2_solver(void);
96 : static ns_solver_t* create_rk2_optimized_solver(void);
97 : static ns_solver_t* create_explicit_euler_optimized_solver(void);
98 : static ns_solver_t* create_projection_solver(void);
99 : static ns_solver_t* create_projection_optimized_solver(void);
100 : #ifdef CFD_HAS_CUDA
101 : static ns_solver_t* create_explicit_euler_gpu_solver(void);
102 : static ns_solver_t* create_projection_gpu_solver(void);
103 : #endif
104 : #ifdef CFD_ENABLE_OPENMP
105 : static ns_solver_t* create_explicit_euler_omp_solver(void);
106 : static ns_solver_t* create_projection_omp_solver(void);
107 : static ns_solver_t* create_rk2_omp_solver(void);
108 : #endif
109 :
110 : // External projection method solver functions
111 : extern cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
112 : const ns_solver_params_t* params);
113 : extern void solve_projection_method_optimized(flow_field* field, const grid* grid,
114 : const ns_solver_params_t* params);
115 :
116 : // External GPU solver functions (from solver_gpu.cu or solver_gpu_stub.c)
117 : #include "cfd/core/cfd_status.h"
118 : #include "cfd/core/gpu_device.h"
119 :
120 :
121 : // Helper to get current time in milliseconds
122 14374 : static double get_time_ms(void) {
123 : #ifdef _WIN32
124 : LARGE_INTEGER freq, counter;
125 : QueryPerformanceFrequency(&freq);
126 : QueryPerformanceCounter(&counter);
127 : return (double)counter.QuadPart * 1000.0 / (double)freq.QuadPart;
128 : #else
129 14374 : struct timeval tv;
130 14374 : gettimeofday(&tv, NULL);
131 14374 : return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
132 : #endif
133 : }
134 :
135 : /**
136 : * solver Registry Implementation
137 : */
138 :
139 277 : ns_solver_registry_t* cfd_registry_create(void) {
140 277 : ns_solver_registry_t* registry = (ns_solver_registry_t*)cfd_calloc(1, sizeof(ns_solver_registry_t));
141 277 : return registry;
142 : }
143 :
144 277 : void cfd_registry_destroy(ns_solver_registry_t* registry) {
145 277 : if (registry) {
146 277 : cfd_free(registry);
147 : }
148 277 : }
149 :
150 274 : void cfd_registry_register_defaults(ns_solver_registry_t* registry) {
151 274 : if (!registry) {
152 : return;
153 : }
154 :
155 : // Register built-in solvers
156 274 : cfd_registry_register(registry, NS_SOLVER_TYPE_EXPLICIT_EULER, create_explicit_euler_solver);
157 274 : cfd_registry_register(registry, NS_SOLVER_TYPE_RK2, create_rk2_solver);
158 274 : cfd_registry_register(registry, NS_SOLVER_TYPE_RK2_OPTIMIZED, create_rk2_optimized_solver);
159 274 : cfd_registry_register(registry, NS_SOLVER_TYPE_EXPLICIT_EULER_OPTIMIZED,
160 : create_explicit_euler_optimized_solver);
161 :
162 : // Register projection method solvers
163 274 : cfd_registry_register(registry, NS_SOLVER_TYPE_PROJECTION, create_projection_solver);
164 274 : cfd_registry_register(registry, NS_SOLVER_TYPE_PROJECTION_OPTIMIZED,
165 : create_projection_optimized_solver);
166 :
167 : // Register GPU solvers (requires CUDA)
168 : #ifdef CFD_HAS_CUDA
169 : cfd_registry_register(registry, NS_SOLVER_TYPE_EXPLICIT_EULER_GPU,
170 : create_explicit_euler_gpu_solver);
171 : cfd_registry_register(registry, NS_SOLVER_TYPE_PROJECTION_JACOBI_GPU,
172 : create_projection_gpu_solver);
173 : #endif
174 :
175 : // Register OpenMP solvers
176 : #ifdef CFD_ENABLE_OPENMP
177 274 : cfd_registry_register(registry, NS_SOLVER_TYPE_EXPLICIT_EULER_OMP,
178 : create_explicit_euler_omp_solver);
179 274 : cfd_registry_register(registry, NS_SOLVER_TYPE_PROJECTION_OMP, create_projection_omp_solver);
180 274 : cfd_registry_register(registry, NS_SOLVER_TYPE_RK2_OMP, create_rk2_omp_solver);
181 : #endif
182 : }
183 :
184 : /**
185 : * Infer backend from solver type name based on naming convention.
186 : * Returns the likely backend, or NS_SOLVER_BACKEND_SCALAR as default.
187 : * Note: Defined here (before first use) and also used by cfd_solver_create_checked.
188 : */
189 2508 : static ns_solver_backend_t infer_backend_from_type(const char* type_name) {
190 2508 : if (!type_name) {
191 : return NS_SOLVER_BACKEND_SCALAR;
192 : }
193 :
194 : /* Check for GPU suffix first (most specific) */
195 2508 : if (strstr(type_name, "_gpu") != NULL) {
196 : return NS_SOLVER_BACKEND_CUDA;
197 : }
198 :
199 : /* Check for OMP suffix */
200 2506 : if (strstr(type_name, "_omp") != NULL) {
201 : return NS_SOLVER_BACKEND_OMP;
202 : }
203 :
204 : /* Check for optimized suffix (SIMD) */
205 1682 : if (strstr(type_name, "_optimized") != NULL) {
206 825 : return NS_SOLVER_BACKEND_SIMD;
207 : }
208 :
209 : /* Default to scalar */
210 : return NS_SOLVER_BACKEND_SCALAR;
211 : }
212 :
213 2502 : int cfd_registry_register(ns_solver_registry_t* registry, const char* type_name,
214 : ns_solver_factory_func factory) {
215 2502 : if (!registry || !type_name || !factory) {
216 2 : cfd_set_error(CFD_ERROR_INVALID, "Invalid arguments for solver registration");
217 2 : return -1;
218 : }
219 2500 : if (strlen(type_name) == 0) {
220 1 : cfd_set_error(CFD_ERROR_INVALID, "solver type name cannot be empty");
221 1 : return -1;
222 : }
223 2499 : if (registry->count >= MAX_REGISTERED_SOLVERS) {
224 1 : cfd_set_error(CFD_ERROR_LIMIT_EXCEEDED, "Max registered solvers limit reached");
225 1 : return -1;
226 : }
227 :
228 : /* Infer backend from type name for efficient filtering later */
229 2498 : ns_solver_backend_t backend = infer_backend_from_type(type_name);
230 :
231 : // Check if already registered
232 12858 : for (int i = 0; i < registry->count; i++) {
233 10360 : if (strcmp(registry->entries[i].name, type_name) == 0) {
234 : // Update existing entry
235 0 : registry->entries[i].factory = factory;
236 0 : registry->entries[i].backend = backend;
237 0 : return 0;
238 : }
239 : }
240 :
241 : // Add new entry
242 2498 : snprintf(registry->entries[registry->count].name,
243 : sizeof(registry->entries[registry->count].name), "%s", type_name);
244 2498 : registry->entries[registry->count].factory = factory;
245 2498 : registry->entries[registry->count].backend = backend;
246 2498 : registry->count++;
247 :
248 2498 : return 0;
249 : }
250 :
251 0 : int cfd_registry_unregister(ns_solver_registry_t* registry, const char* type_name) {
252 0 : if (!registry || !type_name) {
253 : return -1;
254 : }
255 :
256 0 : for (int i = 0; i < registry->count; i++) {
257 0 : if (strcmp(registry->entries[i].name, type_name) == 0) {
258 : // Shift remaining entries
259 0 : for (int j = i; j < registry->count - 1; j++) {
260 0 : registry->entries[j] = registry->entries[j + 1];
261 : }
262 0 : registry->count--;
263 0 : return 0;
264 : }
265 : }
266 : return -1;
267 : }
268 :
269 1 : int cfd_registry_list(ns_solver_registry_t* registry, const char** names, int max_count) {
270 1 : if (!registry) {
271 : return 0;
272 : }
273 :
274 1 : int count = (registry->count < max_count) ? registry->count : max_count;
275 1 : if (names) {
276 10 : for (int i = 0; i < count; i++) {
277 9 : names[i] = registry->entries[i].name;
278 : }
279 : }
280 : return registry->count;
281 : }
282 :
283 0 : int cfd_registry_has(ns_solver_registry_t* registry, const char* type_name) {
284 0 : if (!registry || !type_name) {
285 : return 0;
286 : }
287 :
288 0 : for (int i = 0; i < registry->count; i++) {
289 0 : if (strcmp(registry->entries[i].name, type_name) == 0) {
290 : return 1;
291 : }
292 : }
293 : return 0;
294 : }
295 :
296 0 : const char* cfd_registry_get_description(ns_solver_registry_t* registry, const char* type_name) {
297 0 : if (!registry || !type_name) {
298 : return NULL;
299 : }
300 :
301 : // Create a temporary solver to get its description
302 0 : ns_solver_t* solver = cfd_solver_create(registry, type_name);
303 0 : if (solver) {
304 0 : const char* desc = solver->description;
305 0 : solver_destroy(solver);
306 0 : return desc;
307 : }
308 : return NULL;
309 : }
310 :
311 : /**
312 : * solver Creation and Management
313 : */
314 :
315 283 : ns_solver_t* cfd_solver_create(ns_solver_registry_t* registry, const char* type_name) {
316 283 : if (!registry || !type_name) {
317 1 : cfd_set_error(CFD_ERROR_INVALID, "Invalid arguments for solver creation");
318 1 : return NULL;
319 : }
320 :
321 1023 : for (int i = 0; i < registry->count; i++) {
322 1014 : if (strcmp(registry->entries[i].name, type_name) == 0) {
323 273 : ns_solver_t* solver = registry->entries[i].factory();
324 273 : if (!solver) {
325 : /* Factory returned NULL - check if error was already set by the factory.
326 : * Factories should set specific errors:
327 : * - CFD_ERROR_UNSUPPORTED: backend not available (GPU, SIMD, etc.)
328 : * - CFD_ERROR_NOMEM: allocation failed
329 : * If no error was set, assume memory allocation failure. */
330 0 : if (cfd_get_last_status() == CFD_SUCCESS) {
331 0 : cfd_set_error(CFD_ERROR_NOMEM, "Failed to allocate solver");
332 : }
333 : }
334 273 : return solver;
335 : }
336 : }
337 :
338 : /* Solver type not found in registry - use CFD_ERROR_NOT_FOUND
339 : * to distinguish from CFD_ERROR_UNSUPPORTED (backend unavailable) */
340 9 : char error_msg[128];
341 9 : snprintf(error_msg, sizeof(error_msg), "Solver type '%s' not registered", type_name);
342 9 : cfd_set_error(CFD_ERROR_NOT_FOUND, error_msg);
343 9 : return NULL;
344 : }
345 :
346 274 : void solver_destroy(ns_solver_t* solver) {
347 274 : if (!solver) {
348 : return;
349 : }
350 :
351 273 : if (solver->destroy) {
352 273 : solver->destroy(solver);
353 : }
354 273 : cfd_free(solver);
355 : }
356 :
357 :
358 219 : cfd_status_t solver_init(ns_solver_t* solver, const grid* grid, const ns_solver_params_t* params) {
359 219 : if (!solver) {
360 : return CFD_ERROR_INVALID;
361 : }
362 218 : if (!solver->init) {
363 : return CFD_SUCCESS; // Optional
364 : }
365 :
366 218 : return solver->init(solver, grid, params);
367 : }
368 :
369 :
370 7191 : cfd_status_t solver_step(ns_solver_t* solver, flow_field* field, const grid* grid,
371 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
372 7191 : if (!solver || !field || !grid || !params) {
373 : return CFD_ERROR_INVALID;
374 : }
375 7187 : if (!solver->step) {
376 : return CFD_ERROR;
377 : }
378 :
379 7187 : double start_time = get_time_ms();
380 :
381 7187 : cfd_status_t status = solver->step(solver, field, grid, params, stats);
382 7187 : double end_time = get_time_ms();
383 :
384 7187 : if (stats) {
385 7187 : stats->elapsed_time_ms = end_time - start_time;
386 7187 : stats->status = status;
387 : }
388 :
389 : return status;
390 : }
391 :
392 :
393 1 : cfd_status_t solver_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
394 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
395 1 : if (!solver || !field || !grid || !params) {
396 : return CFD_ERROR_INVALID;
397 : }
398 0 : if (!solver->solve) {
399 : return CFD_ERROR;
400 : }
401 :
402 0 : double start_time = get_time_ms();
403 :
404 0 : cfd_status_t status = solver->solve(solver, field, grid, params, stats);
405 0 : double end_time = get_time_ms();
406 :
407 0 : if (stats) {
408 0 : stats->elapsed_time_ms = end_time - start_time;
409 0 : stats->status = status;
410 : }
411 :
412 : return status;
413 : }
414 :
415 1 : void solver_apply_boundary(ns_solver_t* solver, flow_field* field, const grid* grid) {
416 1 : if (!solver || !field || !grid) {
417 : return;
418 : }
419 :
420 0 : if (solver->apply_boundary) {
421 0 : solver->apply_boundary(solver, field, grid);
422 : } else {
423 : // Fall back to default boundary conditions
424 0 : apply_boundary_conditions(field, grid);
425 : }
426 : }
427 :
428 1 : double solver_compute_dt(ns_solver_t* solver, const flow_field* field, const grid* grid,
429 : const ns_solver_params_t* params) {
430 1 : if (!solver || !field || !grid || !params) {
431 : return 0.0;
432 : }
433 :
434 0 : if (solver->compute_dt) {
435 0 : return solver->compute_dt(solver, field, grid, params);
436 : }
437 :
438 : // Default implementation
439 0 : double max_vel = 0.0;
440 0 : double min_dx = grid->dx[0];
441 0 : double min_dy = grid->dy[0];
442 :
443 0 : for (size_t i = 0; i < grid->nx - 1; i++) {
444 0 : if (grid->dx[i] < min_dx) {
445 0 : min_dx = grid->dx[i];
446 : }
447 : }
448 0 : for (size_t j = 0; j < grid->ny - 1; j++) {
449 0 : if (grid->dy[j] < min_dy) {
450 0 : min_dy = grid->dy[j];
451 : }
452 : }
453 :
454 0 : for (size_t i = 0; i < field->nx * field->ny; i++) {
455 0 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
456 0 : if (vel > max_vel) {
457 0 : max_vel = vel;
458 : }
459 : }
460 :
461 0 : if (max_vel < 1e-10) {
462 0 : max_vel = 1.0;
463 : }
464 :
465 0 : double dt = params->cfl * fmin(min_dx, min_dy) / max_vel;
466 0 : return fmin(fmax(dt, 1e-6), 0.01);
467 : }
468 :
469 : /**
470 : * Built-in solver: Explicit Euler
471 : * Wraps the existing solve_navier_stokes function
472 : */
473 :
474 : typedef struct {
475 : int initialized;
476 : } explicit_euler_context;
477 :
478 135 : static cfd_status_t explicit_euler_init(ns_solver_t* solver, const grid* grid,
479 : const ns_solver_params_t* params) {
480 135 : (void)grid;
481 135 : (void)params;
482 :
483 135 : explicit_euler_context* ctx =
484 135 : (explicit_euler_context*)cfd_malloc(sizeof(explicit_euler_context));
485 135 : if (!ctx) {
486 : return CFD_ERROR;
487 : }
488 :
489 135 : ctx->initialized = 1;
490 135 : solver->context = ctx;
491 135 : return CFD_SUCCESS;
492 : }
493 :
494 171 : static void explicit_euler_destroy(ns_solver_t* solver) {
495 171 : if (solver->context) {
496 135 : cfd_free(solver->context);
497 135 : solver->context = NULL;
498 : }
499 171 : }
500 :
501 6256 : static cfd_status_t explicit_euler_step(ns_solver_t* solver, flow_field* field, const grid* grid,
502 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
503 6256 : (void)solver;
504 :
505 6256 : if (field->nx < 3 || field->ny < 3) {
506 : return CFD_ERROR_INVALID;
507 : }
508 :
509 : // Create params with single iteration
510 6256 : ns_solver_params_t step_params = *params;
511 6256 : step_params.max_iter = 1;
512 :
513 6256 : explicit_euler_impl(field, grid, &step_params);
514 :
515 6256 : if (stats) {
516 898 : stats->iterations = 1;
517 :
518 : // Compute max velocity
519 898 : double max_vel = 0.0;
520 898 : double max_p = 0.0;
521 1050329 : for (size_t i = 0; i < field->nx * field->ny; i++) {
522 1049431 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
523 1049431 : if (vel > max_vel) {
524 8160 : max_vel = vel;
525 : }
526 1049431 : if (fabs(field->p[i]) > max_p) {
527 15552 : max_p = fabs(field->p[i]);
528 : }
529 : }
530 898 : stats->max_velocity = max_vel;
531 898 : stats->max_pressure = max_p;
532 : }
533 :
534 : return CFD_SUCCESS;
535 : }
536 :
537 0 : static cfd_status_t explicit_euler_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
538 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
539 0 : (void)solver;
540 :
541 0 : if (field->nx < 3 || field->ny < 3) {
542 : return CFD_ERROR_INVALID;
543 : }
544 :
545 0 : explicit_euler_impl(field, grid, params);
546 :
547 0 : if (stats) {
548 0 : stats->iterations = params->max_iter;
549 :
550 0 : double max_vel = 0.0;
551 0 : double max_p = 0.0;
552 0 : for (size_t i = 0; i < field->nx * field->ny; i++) {
553 0 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
554 0 : if (vel > max_vel) {
555 0 : max_vel = vel;
556 : }
557 0 : if (fabs(field->p[i]) > max_p) {
558 0 : max_p = fabs(field->p[i]);
559 : }
560 : }
561 0 : stats->max_velocity = max_vel;
562 0 : stats->max_pressure = max_p;
563 : }
564 :
565 : return CFD_SUCCESS;
566 : }
567 :
568 115 : static ns_solver_t* create_explicit_euler_solver(void) {
569 115 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
570 115 : if (!s) {
571 : return NULL;
572 : }
573 :
574 115 : s->name = NS_SOLVER_TYPE_EXPLICIT_EULER;
575 115 : s->description = "Basic explicit Euler finite difference solver for 2D Navier-Stokes";
576 115 : s->version = "1.0.0";
577 115 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT;
578 115 : s->backend = NS_SOLVER_BACKEND_SCALAR;
579 :
580 115 : s->init = explicit_euler_init;
581 115 : s->destroy = explicit_euler_destroy;
582 115 : s->step = explicit_euler_step;
583 115 : s->solve = explicit_euler_solve;
584 115 : s->apply_boundary = NULL; // Use default
585 115 : s->compute_dt = NULL; // Use default
586 :
587 115 : return s;
588 : }
589 :
590 : /**
591 : * Built-in solver: RK2 (Heun's Method)
592 : * Second-order Runge-Kutta time integration
593 : */
594 :
595 4042 : static cfd_status_t rk2_step(ns_solver_t* solver, flow_field* field, const grid* grid,
596 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
597 4042 : (void)solver;
598 :
599 4042 : if (field->nx < 3 || field->ny < 3) {
600 : return CFD_ERROR_INVALID;
601 : }
602 :
603 4042 : ns_solver_params_t step_params = *params;
604 4042 : step_params.max_iter = 1;
605 :
606 4042 : cfd_status_t status = rk2_impl(field, grid, &step_params);
607 :
608 4042 : if (stats) {
609 684 : stats->iterations = 1;
610 684 : double max_vel = 0.0;
611 684 : double max_p = 0.0;
612 869612 : for (size_t i = 0; i < field->nx * field->ny; i++) {
613 868928 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
614 868928 : if (vel > max_vel) max_vel = vel;
615 868928 : if (fabs(field->p[i]) > max_p) max_p = fabs(field->p[i]);
616 : }
617 684 : stats->max_velocity = max_vel;
618 684 : stats->max_pressure = max_p;
619 : }
620 :
621 : return status;
622 : }
623 :
624 0 : static cfd_status_t rk2_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
625 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
626 0 : (void)solver;
627 :
628 0 : if (field->nx < 3 || field->ny < 3) {
629 : return CFD_ERROR_INVALID;
630 : }
631 :
632 0 : cfd_status_t status = rk2_impl(field, grid, params);
633 :
634 0 : if (stats) {
635 0 : stats->iterations = params->max_iter;
636 0 : double max_vel = 0.0;
637 0 : double max_p = 0.0;
638 0 : for (size_t i = 0; i < field->nx * field->ny; i++) {
639 0 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
640 0 : if (vel > max_vel) max_vel = vel;
641 0 : if (fabs(field->p[i]) > max_p) max_p = fabs(field->p[i]);
642 : }
643 0 : stats->max_velocity = max_vel;
644 0 : stats->max_pressure = max_p;
645 : }
646 :
647 : return status;
648 : }
649 :
650 29 : static ns_solver_t* create_rk2_solver(void) {
651 29 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
652 29 : if (!s) {
653 : return NULL;
654 : }
655 :
656 29 : s->name = NS_SOLVER_TYPE_RK2;
657 29 : s->description = "RK2 (Heun's method) - 2nd order explicit time integration";
658 29 : s->version = "1.0.0";
659 29 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT;
660 29 : s->backend = NS_SOLVER_BACKEND_SCALAR;
661 :
662 29 : s->init = explicit_euler_init;
663 29 : s->destroy = explicit_euler_destroy;
664 29 : s->step = rk2_step;
665 29 : s->solve = rk2_solve;
666 29 : s->apply_boundary = NULL;
667 29 : s->compute_dt = NULL;
668 :
669 29 : return s;
670 : }
671 :
672 6 : static ns_solver_t* create_rk2_optimized_solver(void) {
673 6 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
674 6 : if (!s) {
675 : return NULL;
676 : }
677 :
678 6 : s->name = NS_SOLVER_TYPE_RK2_OPTIMIZED;
679 6 : s->description = "AVX2/SIMD + OpenMP RK2 (Heun's method)";
680 6 : s->version = "1.0.0";
681 6 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT |
682 : NS_SOLVER_CAP_SIMD;
683 6 : s->backend = NS_SOLVER_BACKEND_SIMD;
684 :
685 6 : s->init = rk2_avx2_init;
686 6 : s->destroy = rk2_avx2_destroy;
687 6 : s->step = rk2_avx2_step;
688 6 : s->solve = rk2_avx2_solve;
689 6 : s->apply_boundary = NULL;
690 6 : s->compute_dt = NULL;
691 :
692 6 : return s;
693 : }
694 :
695 0 : static cfd_status_t explicit_euler_simd_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
696 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
697 0 : if (!solver || !field || !grid || !params) {
698 : return CFD_ERROR_INVALID;
699 : }
700 :
701 0 : for (int i = 0; i < params->max_iter; i++) {
702 0 : cfd_status_t status = explicit_euler_simd_step(solver, field, grid, params, NULL);
703 0 : if (status != CFD_SUCCESS) {
704 0 : return status;
705 : }
706 : }
707 :
708 0 : if (stats) {
709 0 : stats->iterations = params->max_iter;
710 0 : double max_vel = 0.0;
711 0 : double max_p = 0.0;
712 0 : for (size_t i = 0; i < field->nx * field->ny; i++) {
713 0 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
714 0 : if (vel > max_vel) {
715 0 : max_vel = vel;
716 : }
717 0 : if (fabs(field->p[i]) > max_p) {
718 0 : max_p = fabs(field->p[i]);
719 : }
720 : }
721 0 : stats->max_velocity = max_vel;
722 0 : stats->max_pressure = max_p;
723 : }
724 : return CFD_SUCCESS;
725 : }
726 :
727 14 : static ns_solver_t* create_explicit_euler_optimized_solver(void) {
728 14 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
729 14 : if (!s) {
730 : return NULL;
731 : }
732 :
733 14 : s->name = NS_SOLVER_TYPE_EXPLICIT_EULER_OPTIMIZED;
734 14 : s->description = "SIMD-optimized explicit Euler solver (AVX2)";
735 14 : s->version = "1.0.0";
736 14 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_SIMD;
737 14 : s->backend = NS_SOLVER_BACKEND_SIMD;
738 :
739 14 : s->init = explicit_euler_simd_init;
740 14 : s->destroy = explicit_euler_simd_destroy;
741 14 : s->step = explicit_euler_simd_step;
742 14 : s->solve = explicit_euler_simd_solve;
743 14 : s->apply_boundary = NULL;
744 14 : s->compute_dt = NULL;
745 :
746 14 : return s;
747 : }
748 :
749 : /**
750 : * Built-in solver: Projection Method (Chorin's Method)
751 : */
752 :
753 : typedef struct {
754 : int initialized;
755 : } projection_context;
756 :
757 45 : static cfd_status_t projection_init(ns_solver_t* solver, const grid* grid, const ns_solver_params_t* params) {
758 45 : (void)grid;
759 45 : (void)params;
760 45 : projection_context* ctx = (projection_context*)cfd_malloc(sizeof(projection_context));
761 45 : if (!ctx) {
762 : return CFD_ERROR;
763 : }
764 45 : ctx->initialized = 1;
765 45 : solver->context = ctx;
766 45 : return CFD_SUCCESS;
767 : }
768 :
769 64 : static void projection_destroy(ns_solver_t* solver) {
770 64 : if (solver->context) {
771 53 : cfd_free(solver->context);
772 53 : solver->context = NULL;
773 : }
774 64 : }
775 :
776 4753 : static cfd_status_t projection_step(ns_solver_t* solver, flow_field* field, const grid* grid,
777 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
778 4753 : (void)solver;
779 4753 : if (field->nx < 3 || field->ny < 3) {
780 : return CFD_ERROR_INVALID;
781 : }
782 :
783 4753 : ns_solver_params_t step_params = *params;
784 4753 : step_params.max_iter = 1;
785 :
786 4753 : cfd_status_t status = solve_projection_method(field, grid, &step_params);
787 4753 : if (status != CFD_SUCCESS) {
788 : return status;
789 : }
790 :
791 4753 : if (stats) {
792 4753 : stats->iterations = 1;
793 : // Compute max velocity/pressure
794 4753 : double max_vel = 0.0;
795 4753 : double max_p = 0.0;
796 2094059 : for (size_t i = 0; i < field->nx * field->ny; i++) {
797 2089306 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
798 2089306 : if (vel > max_vel) {
799 276937 : max_vel = vel;
800 : }
801 2089306 : if (fabs(field->p[i]) > max_p) {
802 93586 : max_p = fabs(field->p[i]);
803 : }
804 : }
805 4753 : stats->max_velocity = max_vel;
806 4753 : stats->max_pressure = max_p;
807 : }
808 : return CFD_SUCCESS;
809 : }
810 :
811 0 : static cfd_status_t projection_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
812 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
813 0 : (void)solver;
814 0 : if (field->nx < 3 || field->ny < 3) {
815 : return CFD_ERROR_INVALID;
816 : }
817 :
818 0 : cfd_status_t status = solve_projection_method(field, grid, params);
819 0 : if (status != CFD_SUCCESS) {
820 : return status;
821 : }
822 :
823 0 : if (stats) {
824 0 : stats->iterations = params->max_iter;
825 : // Compute max velocity/pressure
826 0 : double max_vel = 0.0;
827 0 : double max_p = 0.0;
828 0 : for (size_t i = 0; i < field->nx * field->ny; i++) {
829 0 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
830 0 : if (vel > max_vel) {
831 0 : max_vel = vel;
832 : }
833 0 : if (fabs(field->p[i]) > max_p) {
834 0 : max_p = fabs(field->p[i]);
835 : }
836 : }
837 0 : stats->max_velocity = max_vel;
838 0 : stats->max_pressure = max_p;
839 : }
840 : return CFD_SUCCESS;
841 : }
842 :
843 49 : static ns_solver_t* create_projection_solver(void) {
844 49 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
845 49 : if (!s) {
846 : return NULL;
847 : }
848 :
849 49 : s->name = NS_SOLVER_TYPE_PROJECTION;
850 49 : s->description = "Projection method (Chorin's method)";
851 49 : s->version = "1.0.0";
852 49 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT;
853 49 : s->backend = NS_SOLVER_BACKEND_SCALAR;
854 :
855 49 : s->init = projection_init;
856 49 : s->destroy = projection_destroy;
857 49 : s->step = projection_step;
858 49 : s->solve = projection_solve;
859 49 : s->apply_boundary = NULL;
860 49 : s->compute_dt = NULL;
861 :
862 49 : return s;
863 : }
864 :
865 0 : static cfd_status_t projection_simd_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
866 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
867 0 : if (!solver || !field || !grid || !params) {
868 : return CFD_ERROR_INVALID;
869 : }
870 :
871 : // Use the step function which utilizes the persistent context
872 0 : for (int i = 0; i < params->max_iter; i++) {
873 0 : cfd_status_t status = projection_simd_step(solver, field, grid, params,
874 : NULL); // Pass NULL stats for individual steps
875 0 : if (status != CFD_SUCCESS) {
876 0 : return status;
877 : }
878 : }
879 :
880 0 : if (stats) {
881 0 : stats->iterations = params->max_iter;
882 : // Compute max velocity/pressure
883 0 : double max_vel = 0.0;
884 0 : double max_p = 0.0;
885 0 : for (size_t i = 0; i < field->nx * field->ny; i++) {
886 0 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
887 0 : if (vel > max_vel) {
888 0 : max_vel = vel;
889 : }
890 0 : if (fabs(field->p[i]) > max_p) {
891 0 : max_p = fabs(field->p[i]);
892 : }
893 : }
894 0 : stats->max_velocity = max_vel;
895 0 : stats->max_pressure = max_p;
896 : }
897 : return CFD_SUCCESS;
898 : }
899 :
900 18 : static ns_solver_t* create_projection_optimized_solver(void) {
901 18 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
902 18 : if (!s) {
903 : return NULL;
904 : }
905 :
906 18 : s->name = NS_SOLVER_TYPE_PROJECTION_OPTIMIZED;
907 18 : s->description = "SIMD-optimized Projection solver (AVX2)";
908 18 : s->version = "1.0.0";
909 18 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_SIMD;
910 18 : s->backend = NS_SOLVER_BACKEND_SIMD;
911 :
912 18 : s->init = projection_simd_init;
913 18 : s->destroy = projection_simd_destroy;
914 18 : s->step = projection_simd_step;
915 18 : s->solve = projection_simd_solve;
916 18 : s->apply_boundary = NULL;
917 18 : s->compute_dt = NULL;
918 :
919 18 : return s;
920 : }
921 :
922 : #ifdef CFD_HAS_CUDA
923 : /**
924 : * Built-in solver: GPU-Accelerated Explicit Euler
925 : * Uses CUDA for GPU acceleration (requires CUDA support)
926 : */
927 :
928 : typedef struct {
929 : gpu_solver_context_t* gpu_ctx;
930 : gpu_config_t gpu_config_t;
931 : int use_gpu;
932 : } gpu_solver_wrapper_context;
933 :
934 : static cfd_status_t gpu_euler_init(ns_solver_t* solver, const grid* grid, const ns_solver_params_t* params) {
935 : gpu_solver_wrapper_context* ctx =
936 : (gpu_solver_wrapper_context*)cfd_malloc(sizeof(gpu_solver_wrapper_context));
937 : if (!ctx) {
938 : return CFD_ERROR;
939 : }
940 :
941 : ctx->gpu_config_t = gpu_config_default();
942 : ctx->use_gpu = gpu_should_use(&ctx->gpu_config_t, grid->nx, grid->ny, grid->nz, params->max_iter);
943 : ctx->gpu_ctx = NULL;
944 :
945 : if (ctx->use_gpu) {
946 : ctx->gpu_ctx = gpu_solver_create(grid->nx, grid->ny, grid->nz, &ctx->gpu_config_t);
947 : if (!ctx->gpu_ctx) {
948 : cfd_free(ctx);
949 : fprintf(stderr, "GPU Euler init: Failed to create GPU context\n");
950 : return CFD_ERROR_UNSUPPORTED;
951 : }
952 : }
953 :
954 : solver->context = ctx;
955 : return CFD_SUCCESS;
956 : }
957 :
958 : static void gpu_euler_destroy(ns_solver_t* solver) {
959 : if (solver->context) {
960 : gpu_solver_wrapper_context* ctx = (gpu_solver_wrapper_context*)solver->context;
961 : if (ctx->gpu_ctx) {
962 : gpu_solver_destroy(ctx->gpu_ctx);
963 : }
964 : cfd_free(ctx);
965 : solver->context = NULL;
966 : }
967 : }
968 :
969 : static cfd_status_t gpu_euler_step(ns_solver_t* solver, flow_field* field, const grid* grid,
970 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
971 : gpu_solver_wrapper_context* ctx = (gpu_solver_wrapper_context*)solver->context;
972 :
973 : if (field->nx < 3 || field->ny < 3) {
974 : return CFD_ERROR_INVALID;
975 : }
976 :
977 : ns_solver_params_t step_params = *params;
978 : step_params.max_iter = 1;
979 :
980 : if (ctx && ctx->use_gpu && ctx->gpu_ctx) {
981 : // Upload, step, download
982 : if (gpu_solver_upload(ctx->gpu_ctx, field) == 0) {
983 : gpu_solver_stats_t gpu_stats;
984 : if (gpu_solver_step(ctx->gpu_ctx, grid, &step_params, &gpu_stats) == 0) {
985 : gpu_solver_download(ctx->gpu_ctx, field);
986 :
987 : if (stats) {
988 : stats->iterations = 1;
989 : stats->elapsed_time_ms = gpu_stats.kernel_time_ms;
990 : // Compute max velocity/pressure
991 : double max_vel = 0.0, max_p = 0.0;
992 : for (size_t i = 0; i < field->nx * field->ny; i++) {
993 : double vel =
994 : sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
995 : if (vel > max_vel) {
996 : max_vel = vel;
997 : }
998 : if (fabs(field->p[i]) > max_p) {
999 : max_p = fabs(field->p[i]);
1000 : }
1001 : }
1002 : stats->max_velocity = max_vel;
1003 : stats->max_pressure = max_p;
1004 : }
1005 : return CFD_SUCCESS;
1006 : }
1007 : }
1008 : // GPU operation failed
1009 : fprintf(stderr, "GPU Euler step: GPU operation failed\n");
1010 : return CFD_ERROR_INVALID;
1011 : }
1012 :
1013 : // GPU not available - could be: CUDA not compiled in, gpu_should_use() returned false,
1014 : // or GPU initialization failed
1015 : fprintf(stderr, "GPU Euler step: GPU solver not initialized\n");
1016 : return CFD_ERROR_UNSUPPORTED;
1017 : }
1018 :
1019 : static cfd_status_t gpu_euler_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
1020 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
1021 : gpu_solver_wrapper_context* ctx = (gpu_solver_wrapper_context*)solver->context;
1022 :
1023 : if (field->nx < 3 || field->ny < 3) {
1024 : return CFD_ERROR_INVALID;
1025 : }
1026 :
1027 : if (ctx && ctx->use_gpu && ctx->gpu_ctx) {
1028 : // Use full GPU solver
1029 : solve_navier_stokes_gpu(field, grid, params, &ctx->gpu_config_t);
1030 :
1031 : if (stats) {
1032 : stats->iterations = params->max_iter;
1033 : gpu_solver_stats_t gpu_stats = gpu_solver_get_stats(ctx->gpu_ctx);
1034 : stats->elapsed_time_ms = gpu_stats.kernel_time_ms;
1035 :
1036 : double max_vel = 0.0, max_p = 0.0;
1037 : for (size_t i = 0; i < field->nx * field->ny; i++) {
1038 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
1039 : if (vel > max_vel) {
1040 : max_vel = vel;
1041 : }
1042 : if (fabs(field->p[i]) > max_p) {
1043 : max_p = fabs(field->p[i]);
1044 : }
1045 : }
1046 : stats->max_velocity = max_vel;
1047 : stats->max_pressure = max_p;
1048 : }
1049 : return CFD_SUCCESS;
1050 : }
1051 :
1052 : // GPU not available - could be: CUDA not compiled in, gpu_should_use() returned false,
1053 : // or GPU initialization failed
1054 : fprintf(stderr, "GPU Euler solve: GPU solver not initialized\n");
1055 : return CFD_ERROR_UNSUPPORTED;
1056 : }
1057 :
1058 : static ns_solver_t* create_explicit_euler_gpu_solver(void) {
1059 : /* Check if GPU is available at runtime before creating the solver */
1060 : if (!gpu_is_available()) {
1061 : cfd_set_error(CFD_ERROR_UNSUPPORTED, "CUDA GPU not available at runtime");
1062 : return NULL;
1063 : }
1064 :
1065 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
1066 : if (!s) {
1067 : return NULL;
1068 : }
1069 :
1070 : s->name = NS_SOLVER_TYPE_EXPLICIT_EULER_GPU;
1071 : s->description = "GPU-accelerated explicit Euler solver (CUDA)";
1072 : s->version = "1.0.0";
1073 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_GPU;
1074 : s->backend = NS_SOLVER_BACKEND_CUDA;
1075 :
1076 : s->init = gpu_euler_init;
1077 : s->destroy = gpu_euler_destroy;
1078 : s->step = gpu_euler_step;
1079 : s->solve = gpu_euler_solve;
1080 : s->apply_boundary = NULL;
1081 : s->compute_dt = NULL;
1082 :
1083 : return s;
1084 : }
1085 :
1086 : /**
1087 : * Built-in solver: GPU-Accelerated Projection Method
1088 : */
1089 :
1090 : static cfd_status_t gpu_projection_step(ns_solver_t* solver, flow_field* field, const grid* grid,
1091 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
1092 : (void)solver;
1093 : (void)stats;
1094 : ns_solver_params_t step_params = *params;
1095 : step_params.max_iter = 1;
1096 : /* Override thresholds to allow single-step GPU execution on small grids */
1097 : gpu_config_t cfg = gpu_config_default();
1098 : cfg.min_grid_size = 1;
1099 : cfg.min_steps = 1;
1100 : return solve_projection_method_gpu(field, grid, &step_params, &cfg);
1101 : }
1102 :
1103 : static cfd_status_t gpu_projection_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
1104 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
1105 : (void)solver;
1106 : (void)stats;
1107 : /* Override thresholds to allow GPU execution on small grids */
1108 : gpu_config_t cfg = gpu_config_default();
1109 : cfg.min_grid_size = 1;
1110 : cfg.min_steps = 1;
1111 : return solve_projection_method_gpu(field, grid, params, &cfg);
1112 : }
1113 :
1114 : static ns_solver_t* create_projection_gpu_solver(void) {
1115 : /* Check if GPU is available at runtime before creating the solver */
1116 : if (!gpu_is_available()) {
1117 : cfd_set_error(CFD_ERROR_UNSUPPORTED, "CUDA GPU not available at runtime");
1118 : return NULL;
1119 : }
1120 :
1121 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
1122 : if (!s) {
1123 : return NULL;
1124 : }
1125 :
1126 : s->name = NS_SOLVER_TYPE_PROJECTION_JACOBI_GPU;
1127 : s->description = "GPU-accelerated projection method with Jacobi iteration (CUDA)";
1128 : s->version = "1.0.0";
1129 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_GPU;
1130 : s->backend = NS_SOLVER_BACKEND_CUDA;
1131 :
1132 : s->init = NULL;
1133 : s->destroy = NULL;
1134 : s->step = gpu_projection_step;
1135 : s->solve = gpu_projection_solve;
1136 : s->apply_boundary = NULL;
1137 : s->compute_dt = NULL;
1138 :
1139 : return s;
1140 : }
1141 : #endif /* CFD_HAS_CUDA */
1142 :
1143 : #ifdef CFD_ENABLE_OPENMP
1144 : /**
1145 : * Built-in solver: Explicit Euler OpenMP
1146 : */
1147 :
1148 212 : static cfd_status_t explicit_euler_omp_step(ns_solver_t* solver, flow_field* field, const grid* grid,
1149 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
1150 212 : (void)solver;
1151 212 : if (field->nx < 3 || field->ny < 3) {
1152 : return CFD_ERROR_INVALID;
1153 : }
1154 :
1155 212 : ns_solver_params_t step_params = *params;
1156 212 : step_params.max_iter = 1;
1157 :
1158 212 : explicit_euler_omp_impl(field, grid, &step_params);
1159 :
1160 212 : if (stats) {
1161 212 : stats->iterations = 1;
1162 212 : double max_vel = 0.0, max_p = 0.0;
1163 1751380 : for (size_t i = 0; i < field->nx * field->ny; i++) {
1164 1751168 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
1165 1751168 : if (vel > max_vel) {
1166 4080 : max_vel = vel;
1167 : }
1168 1751168 : if (fabs(field->p[i]) > max_p) {
1169 6785 : max_p = fabs(field->p[i]);
1170 : }
1171 : }
1172 212 : stats->max_velocity = max_vel;
1173 212 : stats->max_pressure = max_p;
1174 : }
1175 : return CFD_SUCCESS;
1176 : }
1177 :
1178 0 : static cfd_status_t explicit_euler_omp_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
1179 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
1180 0 : (void)solver;
1181 0 : if (field->nx < 3 || field->ny < 3) {
1182 : return CFD_ERROR_INVALID;
1183 : }
1184 :
1185 0 : explicit_euler_omp_impl(field, grid, params);
1186 :
1187 0 : if (stats) {
1188 0 : stats->iterations = params->max_iter;
1189 0 : double max_vel = 0.0, max_p = 0.0;
1190 0 : for (size_t i = 0; i < field->nx * field->ny; i++) {
1191 0 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
1192 0 : if (vel > max_vel) {
1193 0 : max_vel = vel;
1194 : }
1195 0 : if (fabs(field->p[i]) > max_p) {
1196 0 : max_p = fabs(field->p[i]);
1197 : }
1198 : }
1199 0 : stats->max_velocity = max_vel;
1200 0 : stats->max_pressure = max_p;
1201 : }
1202 : return CFD_SUCCESS;
1203 : }
1204 :
1205 15 : static ns_solver_t* create_explicit_euler_omp_solver(void) {
1206 15 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
1207 15 : if (!s) {
1208 : return NULL;
1209 : }
1210 :
1211 15 : s->name = NS_SOLVER_TYPE_EXPLICIT_EULER_OMP;
1212 15 : s->description = "OpenMP-parallelized explicit Euler solver";
1213 15 : s->version = "1.0.0";
1214 15 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_PARALLEL;
1215 15 : s->backend = NS_SOLVER_BACKEND_OMP;
1216 :
1217 15 : s->init = explicit_euler_init; // Can reuse existing init
1218 15 : s->destroy = explicit_euler_destroy; // Can reuse existing destroy
1219 15 : s->step = explicit_euler_omp_step;
1220 15 : s->solve = explicit_euler_omp_solve;
1221 15 : s->apply_boundary = NULL;
1222 15 : s->compute_dt = NULL;
1223 :
1224 15 : return s;
1225 : }
1226 :
1227 : /**
1228 : * Built-in NSSolver: RK2 OpenMP
1229 : */
1230 :
1231 212 : static cfd_status_t rk2_omp_step(ns_solver_t* solver, flow_field* field, const grid* grid,
1232 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
1233 212 : (void)solver;
1234 212 : if (field->nx < 3 || field->ny < 3) {
1235 : return CFD_ERROR_INVALID;
1236 : }
1237 :
1238 212 : ns_solver_params_t step_params = *params;
1239 212 : step_params.max_iter = 1;
1240 :
1241 212 : cfd_status_t status = rk2_omp_impl(field, grid, &step_params);
1242 :
1243 212 : if (stats) {
1244 212 : stats->iterations = 1;
1245 212 : double max_vel = 0.0, max_p = 0.0;
1246 212 : ptrdiff_t i;
1247 212 : ptrdiff_t n = (ptrdiff_t)(field->nx * field->ny);
1248 : #if _OPENMP >= 201107
1249 212 : #pragma omp parallel for reduction(max: max_vel, max_p) schedule(static)
1250 : #endif
1251 : for (i = 0; i < n; i++) {
1252 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
1253 : if (vel > max_vel) {
1254 : max_vel = vel;
1255 : }
1256 : double ap = fabs(field->p[i]);
1257 : if (ap > max_p) {
1258 : max_p = ap;
1259 : }
1260 : }
1261 212 : stats->max_velocity = max_vel;
1262 212 : stats->max_pressure = max_p;
1263 : }
1264 : return status;
1265 : }
1266 :
1267 0 : static cfd_status_t rk2_omp_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
1268 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
1269 0 : (void)solver;
1270 0 : if (field->nx < 3 || field->ny < 3) {
1271 : return CFD_ERROR_INVALID;
1272 : }
1273 :
1274 0 : cfd_status_t status = rk2_omp_impl(field, grid, params);
1275 :
1276 0 : if (stats) {
1277 0 : stats->iterations = params->max_iter;
1278 0 : double max_vel = 0.0, max_p = 0.0;
1279 0 : ptrdiff_t i;
1280 0 : ptrdiff_t n = (ptrdiff_t)(field->nx * field->ny);
1281 : #if _OPENMP >= 201107
1282 0 : #pragma omp parallel for reduction(max: max_vel, max_p) schedule(static)
1283 : #endif
1284 : for (i = 0; i < n; i++) {
1285 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
1286 : if (vel > max_vel) {
1287 : max_vel = vel;
1288 : }
1289 : double ap = fabs(field->p[i]);
1290 : if (ap > max_p) {
1291 : max_p = ap;
1292 : }
1293 : }
1294 0 : stats->max_velocity = max_vel;
1295 0 : stats->max_pressure = max_p;
1296 : }
1297 : return status;
1298 : }
1299 :
1300 12 : static ns_solver_t* create_rk2_omp_solver(void) {
1301 12 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
1302 12 : if (!s) {
1303 : return NULL;
1304 : }
1305 :
1306 12 : s->name = NS_SOLVER_TYPE_RK2_OMP;
1307 12 : s->description = "OpenMP-parallelized RK2 (Heun's method)";
1308 12 : s->version = "1.0.0";
1309 12 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_PARALLEL;
1310 12 : s->backend = NS_SOLVER_BACKEND_OMP;
1311 :
1312 12 : s->init = explicit_euler_init;
1313 12 : s->destroy = explicit_euler_destroy;
1314 12 : s->step = rk2_omp_step;
1315 12 : s->solve = rk2_omp_solve;
1316 12 : s->apply_boundary = NULL;
1317 12 : s->compute_dt = NULL;
1318 :
1319 12 : return s;
1320 : }
1321 :
1322 : /**
1323 : * Built-in NSSolver: Projection OpenMP
1324 : */
1325 :
1326 8 : static cfd_status_t projection_omp_init(ns_solver_t* solver, const grid* grid,
1327 : const ns_solver_params_t* params) {
1328 8 : (void)params;
1329 8 : if (!solver || !grid) {
1330 : return CFD_ERROR_INVALID;
1331 : }
1332 :
1333 : /* OMP projection requires OMP CG Poisson solver.
1334 : * Never fall back to scalar CG — it would serialize the Poisson solve. */
1335 8 : poisson_solver_t* test_solver = poisson_solver_create(
1336 : POISSON_METHOD_CG, POISSON_BACKEND_OMP);
1337 8 : if (!test_solver) {
1338 0 : fprintf(stderr, "projection_omp_init: OMP CG Poisson solver not available\n");
1339 0 : return CFD_ERROR_UNSUPPORTED;
1340 : }
1341 8 : poisson_solver_destroy(test_solver);
1342 :
1343 8 : projection_context* ctx = (projection_context*)cfd_malloc(sizeof(projection_context));
1344 8 : if (!ctx) {
1345 : return CFD_ERROR_NOMEM;
1346 : }
1347 8 : ctx->initialized = 1;
1348 8 : solver->context = ctx;
1349 8 : return CFD_SUCCESS;
1350 : }
1351 :
1352 122 : static cfd_status_t projection_omp_step(ns_solver_t* solver, flow_field* field, const grid* grid,
1353 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
1354 122 : (void)solver;
1355 122 : if (field->nx < 3 || field->ny < 3) {
1356 : return CFD_ERROR_INVALID;
1357 : }
1358 :
1359 122 : ns_solver_params_t step_params = *params;
1360 122 : step_params.max_iter = 1;
1361 :
1362 122 : cfd_status_t status = solve_projection_method_omp(field, grid, &step_params);
1363 122 : if (status != CFD_SUCCESS) {
1364 : return status;
1365 : }
1366 :
1367 122 : if (stats) {
1368 122 : stats->iterations = 1;
1369 122 : double max_vel = 0.0, max_p = 0.0;
1370 122 : ptrdiff_t i;
1371 122 : ptrdiff_t n = (ptrdiff_t)(field->nx * field->ny);
1372 : #if _OPENMP >= 201107
1373 122 : #pragma omp parallel for reduction(max: max_vel, max_p) schedule(static)
1374 : #endif
1375 : for (i = 0; i < n; i++) {
1376 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
1377 : if (vel > max_vel) {
1378 : max_vel = vel;
1379 : }
1380 : double ap = fabs(field->p[i]);
1381 : if (ap > max_p) {
1382 : max_p = ap;
1383 : }
1384 : }
1385 122 : stats->max_velocity = max_vel;
1386 122 : stats->max_pressure = max_p;
1387 : }
1388 : return CFD_SUCCESS;
1389 : }
1390 :
1391 0 : static cfd_status_t projection_omp_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
1392 : const ns_solver_params_t* params, ns_solver_stats_t* stats) {
1393 0 : (void)solver;
1394 0 : if (field->nx < 3 || field->ny < 3) {
1395 : return CFD_ERROR_INVALID;
1396 : }
1397 :
1398 0 : cfd_status_t status = solve_projection_method_omp(field, grid, params);
1399 0 : if (status != CFD_SUCCESS) {
1400 : return status;
1401 : }
1402 :
1403 0 : if (stats) {
1404 0 : stats->iterations = params->max_iter;
1405 0 : double max_vel = 0.0, max_p = 0.0;
1406 0 : for (size_t i = 0; i < field->nx * field->ny; i++) {
1407 0 : double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
1408 0 : if (vel > max_vel) {
1409 0 : max_vel = vel;
1410 : }
1411 0 : if (fabs(field->p[i]) > max_p) {
1412 0 : max_p = fabs(field->p[i]);
1413 : }
1414 : }
1415 0 : stats->max_velocity = max_vel;
1416 0 : stats->max_pressure = max_p;
1417 : }
1418 : return CFD_SUCCESS;
1419 : }
1420 :
1421 15 : static ns_solver_t* create_projection_omp_solver(void) {
1422 15 : ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
1423 15 : if (!s) {
1424 : return NULL;
1425 : }
1426 :
1427 15 : s->name = NS_SOLVER_TYPE_PROJECTION_OMP;
1428 15 : s->description = "OpenMP-parallelized Projection solver";
1429 15 : s->version = "1.0.0";
1430 15 : s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_PARALLEL;
1431 :
1432 15 : s->init = projection_omp_init; // Checks OMP CG availability
1433 15 : s->destroy = projection_destroy;
1434 15 : s->step = projection_omp_step;
1435 15 : s->solve = projection_omp_solve;
1436 15 : s->apply_boundary = NULL;
1437 15 : s->compute_dt = NULL;
1438 15 : s->backend = NS_SOLVER_BACKEND_OMP;
1439 :
1440 15 : return s;
1441 : }
1442 : #endif
1443 :
1444 : //=============================================================================
1445 : // Backend Availability API
1446 : //=============================================================================
1447 :
1448 30 : int cfd_backend_is_available(ns_solver_backend_t backend) {
1449 30 : switch (backend) {
1450 : case NS_SOLVER_BACKEND_SCALAR:
1451 : return 1; // Always available
1452 :
1453 12 : case NS_SOLVER_BACKEND_SIMD:
1454 12 : return cfd_has_simd();
1455 :
1456 : case NS_SOLVER_BACKEND_OMP:
1457 : #ifdef CFD_ENABLE_OPENMP
1458 : return 1;
1459 : #else
1460 : return 0;
1461 : #endif
1462 :
1463 4 : case NS_SOLVER_BACKEND_CUDA:
1464 4 : return gpu_is_available();
1465 :
1466 1 : default:
1467 1 : return 0;
1468 : }
1469 : }
1470 :
1471 8 : const char* cfd_backend_get_name(ns_solver_backend_t backend) {
1472 8 : switch (backend) {
1473 : case NS_SOLVER_BACKEND_SCALAR:
1474 : return "scalar";
1475 2 : case NS_SOLVER_BACKEND_SIMD:
1476 2 : return "simd";
1477 1 : case NS_SOLVER_BACKEND_OMP:
1478 1 : return "openmp";
1479 3 : case NS_SOLVER_BACKEND_CUDA:
1480 3 : return "cuda";
1481 1 : default:
1482 1 : return "unknown";
1483 : }
1484 : }
1485 :
1486 13 : int cfd_registry_list_by_backend(ns_solver_registry_t* registry, ns_solver_backend_t backend,
1487 : const char** names, int max_count) {
1488 13 : if (!registry) {
1489 : return 0;
1490 : }
1491 :
1492 : int count = 0;
1493 :
1494 : /* If names is NULL, just count total matches (discovery mode).
1495 : * Otherwise, fill the array up to max_count and return number filled. */
1496 111 : for (int i = 0; i < registry->count; i++) {
1497 : /* Use the stored backend instead of creating temporary solvers.
1498 : * This is much more efficient and avoids side effects from factory calls. */
1499 100 : if (registry->entries[i].backend == backend) {
1500 28 : if (names == NULL) {
1501 : /* Discovery mode: count all matches */
1502 3 : count++;
1503 25 : } else if (count < max_count) {
1504 : /* Fill mode: add to array if space available */
1505 24 : names[count] = registry->entries[i].name;
1506 24 : count++;
1507 : } else {
1508 : /* Array full, stop filling but don't count more */
1509 : break;
1510 : }
1511 : }
1512 : }
1513 :
1514 : return count;
1515 : }
1516 :
1517 12 : ns_solver_t* cfd_solver_create_checked(ns_solver_registry_t* registry, const char* type_name) {
1518 12 : if (!registry || !type_name) {
1519 2 : cfd_set_error(CFD_ERROR_INVALID, "Invalid arguments for solver creation");
1520 2 : return NULL;
1521 : }
1522 :
1523 : /* Check backend availability BEFORE creating the solver */
1524 10 : ns_solver_backend_t expected_backend = infer_backend_from_type(type_name);
1525 10 : if (!cfd_backend_is_available(expected_backend)) {
1526 2 : const char* backend_name = cfd_backend_get_name(expected_backend);
1527 2 : char error_msg[128];
1528 2 : snprintf(error_msg, sizeof(error_msg),
1529 : "Backend '%s' is not available on this system", backend_name);
1530 2 : cfd_set_error(CFD_ERROR_UNSUPPORTED, error_msg);
1531 2 : return NULL;
1532 : }
1533 :
1534 : /* Now create the solver - backend is available */
1535 8 : ns_solver_t* solver = cfd_solver_create(registry, type_name);
1536 8 : if (!solver) {
1537 : /* Error already set by cfd_solver_create */
1538 : return NULL;
1539 : }
1540 :
1541 : return solver;
1542 : }
|