LCOV - code coverage report
Current view: top level - api - solver_registry.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 68.9 % 582 401
Test Date: 2026-03-04 10:22:18 Functions: 77.6 % 49 38

            Line data    Source code
       1              : #include "cfd/core/cfd_status.h"
       2              : #include "cfd/core/cpu_features.h"
       3              : #include "cfd/core/filesystem.h"
       4              : #include "cfd/core/gpu_device.h"
       5              : #include "cfd/core/grid.h"
       6              : #include "cfd/core/memory.h"
       7              : #include "cfd/solvers/navier_stokes_solver.h"
       8              : #include "cfd/solvers/poisson_solver.h"
       9              : 
      10              : 
      11              : #ifdef _WIN32
      12              : #define WIN32_LEAN_AND_MEAN
      13              : #include <windows.h>
      14              : #endif
      15              : 
      16              : #include <math.h>
      17              : #include <stddef.h>
      18              : #include <stdio.h>
      19              : #include <stdlib.h>
      20              : #include <string.h>
      21              : 
      22              : #ifdef CFD_ENABLE_OPENMP
      23              : #include <omp.h>
      24              : #endif
      25              : 
      26              : // Forward declarations for internal solver implementations
      27              : // These are not part of the public API
      28              : cfd_status_t explicit_euler_impl(flow_field* field, const grid* grid, const ns_solver_params_t* params);
      29              : cfd_status_t rk2_impl(flow_field* field, const grid* grid, const ns_solver_params_t* params);
      30              : void explicit_euler_optimized_impl(flow_field* field, const grid* grid,
      31              :                                    const ns_solver_params_t* params);
      32              : #ifdef CFD_ENABLE_OPENMP
      33              : cfd_status_t explicit_euler_omp_impl(flow_field* field, const grid* grid,
      34              :                                      const ns_solver_params_t* params);
      35              : cfd_status_t rk2_omp_impl(flow_field* field, const grid* grid,
      36              :                            const ns_solver_params_t* params);
      37              : cfd_status_t solve_projection_method_omp(flow_field* field, const grid* grid,
      38              :                                          const ns_solver_params_t* params);
      39              : #endif
      40              : 
      41              : // GPU solver functions
      42              : cfd_status_t solve_projection_method_gpu(flow_field* field, const grid* grid,
      43              :                                          const ns_solver_params_t* params,
      44              :                                          const gpu_config_t* config);
      45              : int gpu_is_available(void);
      46              : 
      47              : // SIMD solver functions
      48              : 
      49              : cfd_status_t explicit_euler_simd_init(struct NSSolver* solver, const grid* grid,
      50              :                                       const ns_solver_params_t* params);
      51              : void explicit_euler_simd_destroy(struct NSSolver* solver);
      52              : 
      53              : cfd_status_t explicit_euler_simd_step(struct NSSolver* solver, flow_field* field, const grid* grid,
      54              :                                       const ns_solver_params_t* params, ns_solver_stats_t* stats);
      55              : 
      56              : 
      57              : cfd_status_t projection_simd_init(ns_solver_t* solver, const grid* grid, const ns_solver_params_t* params);
      58              : void projection_simd_destroy(ns_solver_t* solver);
      59              : 
      60              : cfd_status_t projection_simd_step(ns_solver_t* solver, flow_field* field, const grid* grid,
      61              :                                   const ns_solver_params_t* params, ns_solver_stats_t* stats);
      62              : 
      63              : cfd_status_t rk2_avx2_init(ns_solver_t* solver, const grid* grid,
      64              :                              const ns_solver_params_t* params);
      65              : void         rk2_avx2_destroy(ns_solver_t* solver);
      66              : cfd_status_t rk2_avx2_step(ns_solver_t* solver, flow_field* field, const grid* grid,
      67              :                              const ns_solver_params_t* params, ns_solver_stats_t* stats);
      68              : cfd_status_t rk2_avx2_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
      69              :                               const ns_solver_params_t* params, ns_solver_stats_t* stats);
      70              : 
      71              : #ifdef _WIN32
      72              : #else
      73              : #include <sys/time.h>
      74              : #endif
      75              : 
      76              : // Maximum number of registered solver types
      77              : #define MAX_REGISTERED_SOLVERS 32
      78              : 
      79              : // Registry entry
      80              : typedef struct {
      81              :     char name[64];
      82              :     ns_solver_factory_func factory;
      83              :     const char* description;
      84              :     ns_solver_backend_t backend;  // Backend type for efficient filtering
      85              : } solver_registry_entry;
      86              : 
      87              : // ns_solver_registry_t structure
      88              : struct NSSolverRegistry {
      89              :     solver_registry_entry entries[MAX_REGISTERED_SOLVERS];
      90              :     int count;
      91              : };
      92              : 
      93              : // Forward declarations for built-in solver factories
      94              : static ns_solver_t* create_explicit_euler_solver(void);
      95              : static ns_solver_t* create_rk2_solver(void);
      96              : static ns_solver_t* create_rk2_optimized_solver(void);
      97              : static ns_solver_t* create_explicit_euler_optimized_solver(void);
      98              : static ns_solver_t* create_projection_solver(void);
      99              : static ns_solver_t* create_projection_optimized_solver(void);
     100              : #ifdef CFD_HAS_CUDA
     101              : static ns_solver_t* create_explicit_euler_gpu_solver(void);
     102              : static ns_solver_t* create_projection_gpu_solver(void);
     103              : #endif
     104              : #ifdef CFD_ENABLE_OPENMP
     105              : static ns_solver_t* create_explicit_euler_omp_solver(void);
     106              : static ns_solver_t* create_projection_omp_solver(void);
     107              : static ns_solver_t* create_rk2_omp_solver(void);
     108              : #endif
     109              : 
     110              : // External projection method solver functions
     111              : extern cfd_status_t solve_projection_method(flow_field* field, const grid* grid,
     112              :                                             const ns_solver_params_t* params);
     113              : extern void solve_projection_method_optimized(flow_field* field, const grid* grid,
     114              :                                               const ns_solver_params_t* params);
     115              : 
     116              : // External GPU solver functions (from solver_gpu.cu or solver_gpu_stub.c)
     117              : #include "cfd/core/cfd_status.h"
     118              : #include "cfd/core/gpu_device.h"
     119              : 
     120              : 
     121              : // Helper to get current time in milliseconds
     122        14374 : static double get_time_ms(void) {
     123              : #ifdef _WIN32
     124              :     LARGE_INTEGER freq, counter;
     125              :     QueryPerformanceFrequency(&freq);
     126              :     QueryPerformanceCounter(&counter);
     127              :     return (double)counter.QuadPart * 1000.0 / (double)freq.QuadPart;
     128              : #else
     129        14374 :     struct timeval tv;
     130        14374 :     gettimeofday(&tv, NULL);
     131        14374 :     return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
     132              : #endif
     133              : }
     134              : 
     135              : /**
     136              :  * solver Registry Implementation
     137              :  */
     138              : 
     139          277 : ns_solver_registry_t* cfd_registry_create(void) {
     140          277 :     ns_solver_registry_t* registry = (ns_solver_registry_t*)cfd_calloc(1, sizeof(ns_solver_registry_t));
     141          277 :     return registry;
     142              : }
     143              : 
     144          277 : void cfd_registry_destroy(ns_solver_registry_t* registry) {
     145          277 :     if (registry) {
     146          277 :         cfd_free(registry);
     147              :     }
     148          277 : }
     149              : 
     150          274 : void cfd_registry_register_defaults(ns_solver_registry_t* registry) {
     151          274 :     if (!registry) {
     152              :         return;
     153              :     }
     154              : 
     155              :     // Register built-in solvers
     156          274 :     cfd_registry_register(registry, NS_SOLVER_TYPE_EXPLICIT_EULER, create_explicit_euler_solver);
     157          274 :     cfd_registry_register(registry, NS_SOLVER_TYPE_RK2, create_rk2_solver);
     158          274 :     cfd_registry_register(registry, NS_SOLVER_TYPE_RK2_OPTIMIZED, create_rk2_optimized_solver);
     159          274 :     cfd_registry_register(registry, NS_SOLVER_TYPE_EXPLICIT_EULER_OPTIMIZED,
     160              :                           create_explicit_euler_optimized_solver);
     161              : 
     162              :     // Register projection method solvers
     163          274 :     cfd_registry_register(registry, NS_SOLVER_TYPE_PROJECTION, create_projection_solver);
     164          274 :     cfd_registry_register(registry, NS_SOLVER_TYPE_PROJECTION_OPTIMIZED,
     165              :                           create_projection_optimized_solver);
     166              : 
     167              :     // Register GPU solvers (requires CUDA)
     168              : #ifdef CFD_HAS_CUDA
     169              :     cfd_registry_register(registry, NS_SOLVER_TYPE_EXPLICIT_EULER_GPU,
     170              :                           create_explicit_euler_gpu_solver);
     171              :     cfd_registry_register(registry, NS_SOLVER_TYPE_PROJECTION_JACOBI_GPU,
     172              :                           create_projection_gpu_solver);
     173              : #endif
     174              : 
     175              :     // Register OpenMP solvers
     176              : #ifdef CFD_ENABLE_OPENMP
     177          274 :     cfd_registry_register(registry, NS_SOLVER_TYPE_EXPLICIT_EULER_OMP,
     178              :                           create_explicit_euler_omp_solver);
     179          274 :     cfd_registry_register(registry, NS_SOLVER_TYPE_PROJECTION_OMP, create_projection_omp_solver);
     180          274 :     cfd_registry_register(registry, NS_SOLVER_TYPE_RK2_OMP, create_rk2_omp_solver);
     181              : #endif
     182              : }
     183              : 
     184              : /**
     185              :  * Infer backend from solver type name based on naming convention.
     186              :  * Returns the likely backend, or NS_SOLVER_BACKEND_SCALAR as default.
     187              :  * Note: Defined here (before first use) and also used by cfd_solver_create_checked.
     188              :  */
     189         2508 : static ns_solver_backend_t infer_backend_from_type(const char* type_name) {
     190         2508 :     if (!type_name) {
     191              :         return NS_SOLVER_BACKEND_SCALAR;
     192              :     }
     193              : 
     194              :     /* Check for GPU suffix first (most specific) */
     195         2508 :     if (strstr(type_name, "_gpu") != NULL) {
     196              :         return NS_SOLVER_BACKEND_CUDA;
     197              :     }
     198              : 
     199              :     /* Check for OMP suffix */
     200         2506 :     if (strstr(type_name, "_omp") != NULL) {
     201              :         return NS_SOLVER_BACKEND_OMP;
     202              :     }
     203              : 
     204              :     /* Check for optimized suffix (SIMD) */
     205         1682 :     if (strstr(type_name, "_optimized") != NULL) {
     206          825 :         return NS_SOLVER_BACKEND_SIMD;
     207              :     }
     208              : 
     209              :     /* Default to scalar */
     210              :     return NS_SOLVER_BACKEND_SCALAR;
     211              : }
     212              : 
     213         2502 : int cfd_registry_register(ns_solver_registry_t* registry, const char* type_name,
     214              :                           ns_solver_factory_func factory) {
     215         2502 :     if (!registry || !type_name || !factory) {
     216            2 :         cfd_set_error(CFD_ERROR_INVALID, "Invalid arguments for solver registration");
     217            2 :         return -1;
     218              :     }
     219         2500 :     if (strlen(type_name) == 0) {
     220            1 :         cfd_set_error(CFD_ERROR_INVALID, "solver type name cannot be empty");
     221            1 :         return -1;
     222              :     }
     223         2499 :     if (registry->count >= MAX_REGISTERED_SOLVERS) {
     224            1 :         cfd_set_error(CFD_ERROR_LIMIT_EXCEEDED, "Max registered solvers limit reached");
     225            1 :         return -1;
     226              :     }
     227              : 
     228              :     /* Infer backend from type name for efficient filtering later */
     229         2498 :     ns_solver_backend_t backend = infer_backend_from_type(type_name);
     230              : 
     231              :     // Check if already registered
     232        12858 :     for (int i = 0; i < registry->count; i++) {
     233        10360 :         if (strcmp(registry->entries[i].name, type_name) == 0) {
     234              :             // Update existing entry
     235            0 :             registry->entries[i].factory = factory;
     236            0 :             registry->entries[i].backend = backend;
     237            0 :             return 0;
     238              :         }
     239              :     }
     240              : 
     241              :     // Add new entry
     242         2498 :     snprintf(registry->entries[registry->count].name,
     243              :              sizeof(registry->entries[registry->count].name), "%s", type_name);
     244         2498 :     registry->entries[registry->count].factory = factory;
     245         2498 :     registry->entries[registry->count].backend = backend;
     246         2498 :     registry->count++;
     247              : 
     248         2498 :     return 0;
     249              : }
     250              : 
     251            0 : int cfd_registry_unregister(ns_solver_registry_t* registry, const char* type_name) {
     252            0 :     if (!registry || !type_name) {
     253              :         return -1;
     254              :     }
     255              : 
     256            0 :     for (int i = 0; i < registry->count; i++) {
     257            0 :         if (strcmp(registry->entries[i].name, type_name) == 0) {
     258              :             // Shift remaining entries
     259            0 :             for (int j = i; j < registry->count - 1; j++) {
     260            0 :                 registry->entries[j] = registry->entries[j + 1];
     261              :             }
     262            0 :             registry->count--;
     263            0 :             return 0;
     264              :         }
     265              :     }
     266              :     return -1;
     267              : }
     268              : 
     269            1 : int cfd_registry_list(ns_solver_registry_t* registry, const char** names, int max_count) {
     270            1 :     if (!registry) {
     271              :         return 0;
     272              :     }
     273              : 
     274            1 :     int count = (registry->count < max_count) ? registry->count : max_count;
     275            1 :     if (names) {
     276           10 :         for (int i = 0; i < count; i++) {
     277            9 :             names[i] = registry->entries[i].name;
     278              :         }
     279              :     }
     280              :     return registry->count;
     281              : }
     282              : 
     283            0 : int cfd_registry_has(ns_solver_registry_t* registry, const char* type_name) {
     284            0 :     if (!registry || !type_name) {
     285              :         return 0;
     286              :     }
     287              : 
     288            0 :     for (int i = 0; i < registry->count; i++) {
     289            0 :         if (strcmp(registry->entries[i].name, type_name) == 0) {
     290              :             return 1;
     291              :         }
     292              :     }
     293              :     return 0;
     294              : }
     295              : 
     296            0 : const char* cfd_registry_get_description(ns_solver_registry_t* registry, const char* type_name) {
     297            0 :     if (!registry || !type_name) {
     298              :         return NULL;
     299              :     }
     300              : 
     301              :     // Create a temporary solver to get its description
     302            0 :     ns_solver_t* solver = cfd_solver_create(registry, type_name);
     303            0 :     if (solver) {
     304            0 :         const char* desc = solver->description;
     305            0 :         solver_destroy(solver);
     306            0 :         return desc;
     307              :     }
     308              :     return NULL;
     309              : }
     310              : 
     311              : /**
     312              :  * solver Creation and Management
     313              :  */
     314              : 
     315          283 : ns_solver_t* cfd_solver_create(ns_solver_registry_t* registry, const char* type_name) {
     316          283 :     if (!registry || !type_name) {
     317            1 :         cfd_set_error(CFD_ERROR_INVALID, "Invalid arguments for solver creation");
     318            1 :         return NULL;
     319              :     }
     320              : 
     321         1023 :     for (int i = 0; i < registry->count; i++) {
     322         1014 :         if (strcmp(registry->entries[i].name, type_name) == 0) {
     323          273 :             ns_solver_t* solver = registry->entries[i].factory();
     324          273 :             if (!solver) {
     325              :                 /* Factory returned NULL - check if error was already set by the factory.
     326              :                  * Factories should set specific errors:
     327              :                  * - CFD_ERROR_UNSUPPORTED: backend not available (GPU, SIMD, etc.)
     328              :                  * - CFD_ERROR_NOMEM: allocation failed
     329              :                  * If no error was set, assume memory allocation failure. */
     330            0 :                 if (cfd_get_last_status() == CFD_SUCCESS) {
     331            0 :                     cfd_set_error(CFD_ERROR_NOMEM, "Failed to allocate solver");
     332              :                 }
     333              :             }
     334          273 :             return solver;
     335              :         }
     336              :     }
     337              : 
     338              :     /* Solver type not found in registry - use CFD_ERROR_NOT_FOUND
     339              :      * to distinguish from CFD_ERROR_UNSUPPORTED (backend unavailable) */
     340            9 :     char error_msg[128];
     341            9 :     snprintf(error_msg, sizeof(error_msg), "Solver type '%s' not registered", type_name);
     342            9 :     cfd_set_error(CFD_ERROR_NOT_FOUND, error_msg);
     343            9 :     return NULL;
     344              : }
     345              : 
     346          274 : void solver_destroy(ns_solver_t* solver) {
     347          274 :     if (!solver) {
     348              :         return;
     349              :     }
     350              : 
     351          273 :     if (solver->destroy) {
     352          273 :         solver->destroy(solver);
     353              :     }
     354          273 :     cfd_free(solver);
     355              : }
     356              : 
     357              : 
     358          219 : cfd_status_t solver_init(ns_solver_t* solver, const grid* grid, const ns_solver_params_t* params) {
     359          219 :     if (!solver) {
     360              :         return CFD_ERROR_INVALID;
     361              :     }
     362          218 :     if (!solver->init) {
     363              :         return CFD_SUCCESS;  // Optional
     364              :     }
     365              : 
     366          218 :     return solver->init(solver, grid, params);
     367              : }
     368              : 
     369              : 
     370         7191 : cfd_status_t solver_step(ns_solver_t* solver, flow_field* field, const grid* grid,
     371              :                          const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     372         7191 :     if (!solver || !field || !grid || !params) {
     373              :         return CFD_ERROR_INVALID;
     374              :     }
     375         7187 :     if (!solver->step) {
     376              :         return CFD_ERROR;
     377              :     }
     378              : 
     379         7187 :     double start_time = get_time_ms();
     380              : 
     381         7187 :     cfd_status_t status = solver->step(solver, field, grid, params, stats);
     382         7187 :     double end_time = get_time_ms();
     383              : 
     384         7187 :     if (stats) {
     385         7187 :         stats->elapsed_time_ms = end_time - start_time;
     386         7187 :         stats->status = status;
     387              :     }
     388              : 
     389              :     return status;
     390              : }
     391              : 
     392              : 
     393            1 : cfd_status_t solver_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
     394              :                           const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     395            1 :     if (!solver || !field || !grid || !params) {
     396              :         return CFD_ERROR_INVALID;
     397              :     }
     398            0 :     if (!solver->solve) {
     399              :         return CFD_ERROR;
     400              :     }
     401              : 
     402            0 :     double start_time = get_time_ms();
     403              : 
     404            0 :     cfd_status_t status = solver->solve(solver, field, grid, params, stats);
     405            0 :     double end_time = get_time_ms();
     406              : 
     407            0 :     if (stats) {
     408            0 :         stats->elapsed_time_ms = end_time - start_time;
     409            0 :         stats->status = status;
     410              :     }
     411              : 
     412              :     return status;
     413              : }
     414              : 
     415            1 : void solver_apply_boundary(ns_solver_t* solver, flow_field* field, const grid* grid) {
     416            1 :     if (!solver || !field || !grid) {
     417              :         return;
     418              :     }
     419              : 
     420            0 :     if (solver->apply_boundary) {
     421            0 :         solver->apply_boundary(solver, field, grid);
     422              :     } else {
     423              :         // Fall back to default boundary conditions
     424            0 :         apply_boundary_conditions(field, grid);
     425              :     }
     426              : }
     427              : 
     428            1 : double solver_compute_dt(ns_solver_t* solver, const flow_field* field, const grid* grid,
     429              :                          const ns_solver_params_t* params) {
     430            1 :     if (!solver || !field || !grid || !params) {
     431              :         return 0.0;
     432              :     }
     433              : 
     434            0 :     if (solver->compute_dt) {
     435            0 :         return solver->compute_dt(solver, field, grid, params);
     436              :     }
     437              : 
     438              :     // Default implementation
     439            0 :     double max_vel = 0.0;
     440            0 :     double min_dx = grid->dx[0];
     441            0 :     double min_dy = grid->dy[0];
     442              : 
     443            0 :     for (size_t i = 0; i < grid->nx - 1; i++) {
     444            0 :         if (grid->dx[i] < min_dx) {
     445            0 :             min_dx = grid->dx[i];
     446              :         }
     447              :     }
     448            0 :     for (size_t j = 0; j < grid->ny - 1; j++) {
     449            0 :         if (grid->dy[j] < min_dy) {
     450            0 :             min_dy = grid->dy[j];
     451              :         }
     452              :     }
     453              : 
     454            0 :     for (size_t i = 0; i < field->nx * field->ny; i++) {
     455            0 :         double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     456            0 :         if (vel > max_vel) {
     457            0 :             max_vel = vel;
     458              :         }
     459              :     }
     460              : 
     461            0 :     if (max_vel < 1e-10) {
     462            0 :         max_vel = 1.0;
     463              :     }
     464              : 
     465            0 :     double dt = params->cfl * fmin(min_dx, min_dy) / max_vel;
     466            0 :     return fmin(fmax(dt, 1e-6), 0.01);
     467              : }
     468              : 
     469              : /**
     470              :  * Built-in solver: Explicit Euler
     471              :  * Wraps the existing solve_navier_stokes function
     472              :  */
     473              : 
     474              : typedef struct {
     475              :     int initialized;
     476              : } explicit_euler_context;
     477              : 
     478          135 : static cfd_status_t explicit_euler_init(ns_solver_t* solver, const grid* grid,
     479              :                                         const ns_solver_params_t* params) {
     480          135 :     (void)grid;
     481          135 :     (void)params;
     482              : 
     483          135 :     explicit_euler_context* ctx =
     484          135 :         (explicit_euler_context*)cfd_malloc(sizeof(explicit_euler_context));
     485          135 :     if (!ctx) {
     486              :         return CFD_ERROR;
     487              :     }
     488              : 
     489          135 :     ctx->initialized = 1;
     490          135 :     solver->context = ctx;
     491          135 :     return CFD_SUCCESS;
     492              : }
     493              : 
     494          171 : static void explicit_euler_destroy(ns_solver_t* solver) {
     495          171 :     if (solver->context) {
     496          135 :         cfd_free(solver->context);
     497          135 :         solver->context = NULL;
     498              :     }
     499          171 : }
     500              : 
     501         6256 : static cfd_status_t explicit_euler_step(ns_solver_t* solver, flow_field* field, const grid* grid,
     502              :                                         const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     503         6256 :     (void)solver;
     504              : 
     505         6256 :     if (field->nx < 3 || field->ny < 3) {
     506              :         return CFD_ERROR_INVALID;
     507              :     }
     508              : 
     509              :     // Create params with single iteration
     510         6256 :     ns_solver_params_t step_params = *params;
     511         6256 :     step_params.max_iter = 1;
     512              : 
     513         6256 :     explicit_euler_impl(field, grid, &step_params);
     514              : 
     515         6256 :     if (stats) {
     516          898 :         stats->iterations = 1;
     517              : 
     518              :         // Compute max velocity
     519          898 :         double max_vel = 0.0;
     520          898 :         double max_p = 0.0;
     521      1050329 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
     522      1049431 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     523      1049431 :             if (vel > max_vel) {
     524         8160 :                 max_vel = vel;
     525              :             }
     526      1049431 :             if (fabs(field->p[i]) > max_p) {
     527        15552 :                 max_p = fabs(field->p[i]);
     528              :             }
     529              :         }
     530          898 :         stats->max_velocity = max_vel;
     531          898 :         stats->max_pressure = max_p;
     532              :     }
     533              : 
     534              :     return CFD_SUCCESS;
     535              : }
     536              : 
     537            0 : static cfd_status_t explicit_euler_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
     538              :                                          const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     539            0 :     (void)solver;
     540              : 
     541            0 :     if (field->nx < 3 || field->ny < 3) {
     542              :         return CFD_ERROR_INVALID;
     543              :     }
     544              : 
     545            0 :     explicit_euler_impl(field, grid, params);
     546              : 
     547            0 :     if (stats) {
     548            0 :         stats->iterations = params->max_iter;
     549              : 
     550            0 :         double max_vel = 0.0;
     551            0 :         double max_p = 0.0;
     552            0 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
     553            0 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     554            0 :             if (vel > max_vel) {
     555            0 :                 max_vel = vel;
     556              :             }
     557            0 :             if (fabs(field->p[i]) > max_p) {
     558            0 :                 max_p = fabs(field->p[i]);
     559              :             }
     560              :         }
     561            0 :         stats->max_velocity = max_vel;
     562            0 :         stats->max_pressure = max_p;
     563              :     }
     564              : 
     565              :     return CFD_SUCCESS;
     566              : }
     567              : 
     568          115 : static ns_solver_t* create_explicit_euler_solver(void) {
     569          115 :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
     570          115 :     if (!s) {
     571              :         return NULL;
     572              :     }
     573              : 
     574          115 :     s->name = NS_SOLVER_TYPE_EXPLICIT_EULER;
     575          115 :     s->description = "Basic explicit Euler finite difference solver for 2D Navier-Stokes";
     576          115 :     s->version = "1.0.0";
     577          115 :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT;
     578          115 :     s->backend = NS_SOLVER_BACKEND_SCALAR;
     579              : 
     580          115 :     s->init = explicit_euler_init;
     581          115 :     s->destroy = explicit_euler_destroy;
     582          115 :     s->step = explicit_euler_step;
     583          115 :     s->solve = explicit_euler_solve;
     584          115 :     s->apply_boundary = NULL;  // Use default
     585          115 :     s->compute_dt = NULL;      // Use default
     586              : 
     587          115 :     return s;
     588              : }
     589              : 
     590              : /**
     591              :  * Built-in solver: RK2 (Heun's Method)
     592              :  * Second-order Runge-Kutta time integration
     593              :  */
     594              : 
     595         4042 : static cfd_status_t rk2_step(ns_solver_t* solver, flow_field* field, const grid* grid,
     596              :                               const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     597         4042 :     (void)solver;
     598              : 
     599         4042 :     if (field->nx < 3 || field->ny < 3) {
     600              :         return CFD_ERROR_INVALID;
     601              :     }
     602              : 
     603         4042 :     ns_solver_params_t step_params = *params;
     604         4042 :     step_params.max_iter = 1;
     605              : 
     606         4042 :     cfd_status_t status = rk2_impl(field, grid, &step_params);
     607              : 
     608         4042 :     if (stats) {
     609          684 :         stats->iterations = 1;
     610          684 :         double max_vel = 0.0;
     611          684 :         double max_p = 0.0;
     612       869612 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
     613       868928 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     614       868928 :             if (vel > max_vel) max_vel = vel;
     615       868928 :             if (fabs(field->p[i]) > max_p) max_p = fabs(field->p[i]);
     616              :         }
     617          684 :         stats->max_velocity = max_vel;
     618          684 :         stats->max_pressure = max_p;
     619              :     }
     620              : 
     621              :     return status;
     622              : }
     623              : 
     624            0 : static cfd_status_t rk2_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
     625              :                                const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     626            0 :     (void)solver;
     627              : 
     628            0 :     if (field->nx < 3 || field->ny < 3) {
     629              :         return CFD_ERROR_INVALID;
     630              :     }
     631              : 
     632            0 :     cfd_status_t status = rk2_impl(field, grid, params);
     633              : 
     634            0 :     if (stats) {
     635            0 :         stats->iterations = params->max_iter;
     636            0 :         double max_vel = 0.0;
     637            0 :         double max_p = 0.0;
     638            0 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
     639            0 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     640            0 :             if (vel > max_vel) max_vel = vel;
     641            0 :             if (fabs(field->p[i]) > max_p) max_p = fabs(field->p[i]);
     642              :         }
     643            0 :         stats->max_velocity = max_vel;
     644            0 :         stats->max_pressure = max_p;
     645              :     }
     646              : 
     647              :     return status;
     648              : }
     649              : 
     650           29 : static ns_solver_t* create_rk2_solver(void) {
     651           29 :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
     652           29 :     if (!s) {
     653              :         return NULL;
     654              :     }
     655              : 
     656           29 :     s->name = NS_SOLVER_TYPE_RK2;
     657           29 :     s->description = "RK2 (Heun's method) - 2nd order explicit time integration";
     658           29 :     s->version = "1.0.0";
     659           29 :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT;
     660           29 :     s->backend = NS_SOLVER_BACKEND_SCALAR;
     661              : 
     662           29 :     s->init = explicit_euler_init;
     663           29 :     s->destroy = explicit_euler_destroy;
     664           29 :     s->step = rk2_step;
     665           29 :     s->solve = rk2_solve;
     666           29 :     s->apply_boundary = NULL;
     667           29 :     s->compute_dt = NULL;
     668              : 
     669           29 :     return s;
     670              : }
     671              : 
     672            6 : static ns_solver_t* create_rk2_optimized_solver(void) {
     673            6 :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
     674            6 :     if (!s) {
     675              :         return NULL;
     676              :     }
     677              : 
     678            6 :     s->name        = NS_SOLVER_TYPE_RK2_OPTIMIZED;
     679            6 :     s->description = "AVX2/SIMD + OpenMP RK2 (Heun's method)";
     680            6 :     s->version     = "1.0.0";
     681            6 :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT |
     682              :                       NS_SOLVER_CAP_SIMD;
     683            6 :     s->backend     = NS_SOLVER_BACKEND_SIMD;
     684              : 
     685            6 :     s->init           = rk2_avx2_init;
     686            6 :     s->destroy        = rk2_avx2_destroy;
     687            6 :     s->step           = rk2_avx2_step;
     688            6 :     s->solve          = rk2_avx2_solve;
     689            6 :     s->apply_boundary = NULL;
     690            6 :     s->compute_dt     = NULL;
     691              : 
     692            6 :     return s;
     693              : }
     694              : 
     695            0 : static cfd_status_t explicit_euler_simd_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
     696              :                                               const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     697            0 :     if (!solver || !field || !grid || !params) {
     698              :         return CFD_ERROR_INVALID;
     699              :     }
     700              : 
     701            0 :     for (int i = 0; i < params->max_iter; i++) {
     702            0 :         cfd_status_t status = explicit_euler_simd_step(solver, field, grid, params, NULL);
     703            0 :         if (status != CFD_SUCCESS) {
     704            0 :             return status;
     705              :         }
     706              :     }
     707              : 
     708            0 :     if (stats) {
     709            0 :         stats->iterations = params->max_iter;
     710            0 :         double max_vel = 0.0;
     711            0 :         double max_p = 0.0;
     712            0 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
     713            0 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     714            0 :             if (vel > max_vel) {
     715            0 :                 max_vel = vel;
     716              :             }
     717            0 :             if (fabs(field->p[i]) > max_p) {
     718            0 :                 max_p = fabs(field->p[i]);
     719              :             }
     720              :         }
     721            0 :         stats->max_velocity = max_vel;
     722            0 :         stats->max_pressure = max_p;
     723              :     }
     724              :     return CFD_SUCCESS;
     725              : }
     726              : 
     727           14 : static ns_solver_t* create_explicit_euler_optimized_solver(void) {
     728           14 :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
     729           14 :     if (!s) {
     730              :         return NULL;
     731              :     }
     732              : 
     733           14 :     s->name = NS_SOLVER_TYPE_EXPLICIT_EULER_OPTIMIZED;
     734           14 :     s->description = "SIMD-optimized explicit Euler solver (AVX2)";
     735           14 :     s->version = "1.0.0";
     736           14 :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_SIMD;
     737           14 :     s->backend = NS_SOLVER_BACKEND_SIMD;
     738              : 
     739           14 :     s->init = explicit_euler_simd_init;
     740           14 :     s->destroy = explicit_euler_simd_destroy;
     741           14 :     s->step = explicit_euler_simd_step;
     742           14 :     s->solve = explicit_euler_simd_solve;
     743           14 :     s->apply_boundary = NULL;
     744           14 :     s->compute_dt = NULL;
     745              : 
     746           14 :     return s;
     747              : }
     748              : 
     749              : /**
     750              :  * Built-in solver: Projection Method (Chorin's Method)
     751              :  */
     752              : 
     753              : typedef struct {
     754              :     int initialized;
     755              : } projection_context;
     756              : 
     757           45 : static cfd_status_t projection_init(ns_solver_t* solver, const grid* grid, const ns_solver_params_t* params) {
     758           45 :     (void)grid;
     759           45 :     (void)params;
     760           45 :     projection_context* ctx = (projection_context*)cfd_malloc(sizeof(projection_context));
     761           45 :     if (!ctx) {
     762              :         return CFD_ERROR;
     763              :     }
     764           45 :     ctx->initialized = 1;
     765           45 :     solver->context = ctx;
     766           45 :     return CFD_SUCCESS;
     767              : }
     768              : 
     769           64 : static void projection_destroy(ns_solver_t* solver) {
     770           64 :     if (solver->context) {
     771           53 :         cfd_free(solver->context);
     772           53 :         solver->context = NULL;
     773              :     }
     774           64 : }
     775              : 
     776         4753 : static cfd_status_t projection_step(ns_solver_t* solver, flow_field* field, const grid* grid,
     777              :                                     const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     778         4753 :     (void)solver;
     779         4753 :     if (field->nx < 3 || field->ny < 3) {
     780              :         return CFD_ERROR_INVALID;
     781              :     }
     782              : 
     783         4753 :     ns_solver_params_t step_params = *params;
     784         4753 :     step_params.max_iter = 1;
     785              : 
     786         4753 :     cfd_status_t status = solve_projection_method(field, grid, &step_params);
     787         4753 :     if (status != CFD_SUCCESS) {
     788              :         return status;
     789              :     }
     790              : 
     791         4753 :     if (stats) {
     792         4753 :         stats->iterations = 1;
     793              :         // Compute max velocity/pressure
     794         4753 :         double max_vel = 0.0;
     795         4753 :         double max_p = 0.0;
     796      2094059 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
     797      2089306 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     798      2089306 :             if (vel > max_vel) {
     799       276937 :                 max_vel = vel;
     800              :             }
     801      2089306 :             if (fabs(field->p[i]) > max_p) {
     802        93586 :                 max_p = fabs(field->p[i]);
     803              :             }
     804              :         }
     805         4753 :         stats->max_velocity = max_vel;
     806         4753 :         stats->max_pressure = max_p;
     807              :     }
     808              :     return CFD_SUCCESS;
     809              : }
     810              : 
     811            0 : static cfd_status_t projection_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
     812              :                                      const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     813            0 :     (void)solver;
     814            0 :     if (field->nx < 3 || field->ny < 3) {
     815              :         return CFD_ERROR_INVALID;
     816              :     }
     817              : 
     818            0 :     cfd_status_t status = solve_projection_method(field, grid, params);
     819            0 :     if (status != CFD_SUCCESS) {
     820              :         return status;
     821              :     }
     822              : 
     823            0 :     if (stats) {
     824            0 :         stats->iterations = params->max_iter;
     825              :         // Compute max velocity/pressure
     826            0 :         double max_vel = 0.0;
     827            0 :         double max_p = 0.0;
     828            0 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
     829            0 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     830            0 :             if (vel > max_vel) {
     831            0 :                 max_vel = vel;
     832              :             }
     833            0 :             if (fabs(field->p[i]) > max_p) {
     834            0 :                 max_p = fabs(field->p[i]);
     835              :             }
     836              :         }
     837            0 :         stats->max_velocity = max_vel;
     838            0 :         stats->max_pressure = max_p;
     839              :     }
     840              :     return CFD_SUCCESS;
     841              : }
     842              : 
     843           49 : static ns_solver_t* create_projection_solver(void) {
     844           49 :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
     845           49 :     if (!s) {
     846              :         return NULL;
     847              :     }
     848              : 
     849           49 :     s->name = NS_SOLVER_TYPE_PROJECTION;
     850           49 :     s->description = "Projection method (Chorin's method)";
     851           49 :     s->version = "1.0.0";
     852           49 :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT;
     853           49 :     s->backend = NS_SOLVER_BACKEND_SCALAR;
     854              : 
     855           49 :     s->init = projection_init;
     856           49 :     s->destroy = projection_destroy;
     857           49 :     s->step = projection_step;
     858           49 :     s->solve = projection_solve;
     859           49 :     s->apply_boundary = NULL;
     860           49 :     s->compute_dt = NULL;
     861              : 
     862           49 :     return s;
     863              : }
     864              : 
     865            0 : static cfd_status_t projection_simd_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
     866              :                                           const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     867            0 :     if (!solver || !field || !grid || !params) {
     868              :         return CFD_ERROR_INVALID;
     869              :     }
     870              : 
     871              :     // Use the step function which utilizes the persistent context
     872            0 :     for (int i = 0; i < params->max_iter; i++) {
     873            0 :         cfd_status_t status = projection_simd_step(solver, field, grid, params,
     874              :                                                    NULL);  // Pass NULL stats for individual steps
     875            0 :         if (status != CFD_SUCCESS) {
     876            0 :             return status;
     877              :         }
     878              :     }
     879              : 
     880            0 :     if (stats) {
     881            0 :         stats->iterations = params->max_iter;
     882              :         // Compute max velocity/pressure
     883            0 :         double max_vel = 0.0;
     884            0 :         double max_p = 0.0;
     885            0 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
     886            0 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     887            0 :             if (vel > max_vel) {
     888            0 :                 max_vel = vel;
     889              :             }
     890            0 :             if (fabs(field->p[i]) > max_p) {
     891            0 :                 max_p = fabs(field->p[i]);
     892              :             }
     893              :         }
     894            0 :         stats->max_velocity = max_vel;
     895            0 :         stats->max_pressure = max_p;
     896              :     }
     897              :     return CFD_SUCCESS;
     898              : }
     899              : 
     900           18 : static ns_solver_t* create_projection_optimized_solver(void) {
     901           18 :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
     902           18 :     if (!s) {
     903              :         return NULL;
     904              :     }
     905              : 
     906           18 :     s->name = NS_SOLVER_TYPE_PROJECTION_OPTIMIZED;
     907           18 :     s->description = "SIMD-optimized Projection solver (AVX2)";
     908           18 :     s->version = "1.0.0";
     909           18 :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_SIMD;
     910           18 :     s->backend = NS_SOLVER_BACKEND_SIMD;
     911              : 
     912           18 :     s->init = projection_simd_init;
     913           18 :     s->destroy = projection_simd_destroy;
     914           18 :     s->step = projection_simd_step;
     915           18 :     s->solve = projection_simd_solve;
     916           18 :     s->apply_boundary = NULL;
     917           18 :     s->compute_dt = NULL;
     918              : 
     919           18 :     return s;
     920              : }
     921              : 
     922              : #ifdef CFD_HAS_CUDA
     923              : /**
     924              :  * Built-in solver: GPU-Accelerated Explicit Euler
     925              :  * Uses CUDA for GPU acceleration (requires CUDA support)
     926              :  */
     927              : 
     928              : typedef struct {
     929              :     gpu_solver_context_t* gpu_ctx;
     930              :     gpu_config_t gpu_config_t;
     931              :     int use_gpu;
     932              : } gpu_solver_wrapper_context;
     933              : 
     934              : static cfd_status_t gpu_euler_init(ns_solver_t* solver, const grid* grid, const ns_solver_params_t* params) {
     935              :     gpu_solver_wrapper_context* ctx =
     936              :         (gpu_solver_wrapper_context*)cfd_malloc(sizeof(gpu_solver_wrapper_context));
     937              :     if (!ctx) {
     938              :         return CFD_ERROR;
     939              :     }
     940              : 
     941              :     ctx->gpu_config_t = gpu_config_default();
     942              :     ctx->use_gpu = gpu_should_use(&ctx->gpu_config_t, grid->nx, grid->ny, grid->nz, params->max_iter);
     943              :     ctx->gpu_ctx = NULL;
     944              : 
     945              :     if (ctx->use_gpu) {
     946              :         ctx->gpu_ctx = gpu_solver_create(grid->nx, grid->ny, grid->nz, &ctx->gpu_config_t);
     947              :         if (!ctx->gpu_ctx) {
     948              :             cfd_free(ctx);
     949              :             fprintf(stderr, "GPU Euler init: Failed to create GPU context\n");
     950              :             return CFD_ERROR_UNSUPPORTED;
     951              :         }
     952              :     }
     953              : 
     954              :     solver->context = ctx;
     955              :     return CFD_SUCCESS;
     956              : }
     957              : 
     958              : static void gpu_euler_destroy(ns_solver_t* solver) {
     959              :     if (solver->context) {
     960              :         gpu_solver_wrapper_context* ctx = (gpu_solver_wrapper_context*)solver->context;
     961              :         if (ctx->gpu_ctx) {
     962              :             gpu_solver_destroy(ctx->gpu_ctx);
     963              :         }
     964              :         cfd_free(ctx);
     965              :         solver->context = NULL;
     966              :     }
     967              : }
     968              : 
     969              : static cfd_status_t gpu_euler_step(ns_solver_t* solver, flow_field* field, const grid* grid,
     970              :                                    const ns_solver_params_t* params, ns_solver_stats_t* stats) {
     971              :     gpu_solver_wrapper_context* ctx = (gpu_solver_wrapper_context*)solver->context;
     972              : 
     973              :     if (field->nx < 3 || field->ny < 3) {
     974              :         return CFD_ERROR_INVALID;
     975              :     }
     976              : 
     977              :     ns_solver_params_t step_params = *params;
     978              :     step_params.max_iter = 1;
     979              : 
     980              :     if (ctx && ctx->use_gpu && ctx->gpu_ctx) {
     981              :         // Upload, step, download
     982              :         if (gpu_solver_upload(ctx->gpu_ctx, field) == 0) {
     983              :             gpu_solver_stats_t gpu_stats;
     984              :             if (gpu_solver_step(ctx->gpu_ctx, grid, &step_params, &gpu_stats) == 0) {
     985              :                 gpu_solver_download(ctx->gpu_ctx, field);
     986              : 
     987              :                 if (stats) {
     988              :                     stats->iterations = 1;
     989              :                     stats->elapsed_time_ms = gpu_stats.kernel_time_ms;
     990              :                     // Compute max velocity/pressure
     991              :                     double max_vel = 0.0, max_p = 0.0;
     992              :                     for (size_t i = 0; i < field->nx * field->ny; i++) {
     993              :                         double vel =
     994              :                             sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
     995              :                         if (vel > max_vel) {
     996              :                             max_vel = vel;
     997              :                         }
     998              :                         if (fabs(field->p[i]) > max_p) {
     999              :                             max_p = fabs(field->p[i]);
    1000              :                         }
    1001              :                     }
    1002              :                     stats->max_velocity = max_vel;
    1003              :                     stats->max_pressure = max_p;
    1004              :                 }
    1005              :                 return CFD_SUCCESS;
    1006              :             }
    1007              :         }
    1008              :         // GPU operation failed
    1009              :         fprintf(stderr, "GPU Euler step: GPU operation failed\n");
    1010              :         return CFD_ERROR_INVALID;
    1011              :     }
    1012              : 
    1013              :     // GPU not available - could be: CUDA not compiled in, gpu_should_use() returned false,
    1014              :     // or GPU initialization failed
    1015              :     fprintf(stderr, "GPU Euler step: GPU solver not initialized\n");
    1016              :     return CFD_ERROR_UNSUPPORTED;
    1017              : }
    1018              : 
    1019              : static cfd_status_t gpu_euler_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
    1020              :                                     const ns_solver_params_t* params, ns_solver_stats_t* stats) {
    1021              :     gpu_solver_wrapper_context* ctx = (gpu_solver_wrapper_context*)solver->context;
    1022              : 
    1023              :     if (field->nx < 3 || field->ny < 3) {
    1024              :         return CFD_ERROR_INVALID;
    1025              :     }
    1026              : 
    1027              :     if (ctx && ctx->use_gpu && ctx->gpu_ctx) {
    1028              :         // Use full GPU solver
    1029              :         solve_navier_stokes_gpu(field, grid, params, &ctx->gpu_config_t);
    1030              : 
    1031              :         if (stats) {
    1032              :             stats->iterations = params->max_iter;
    1033              :             gpu_solver_stats_t gpu_stats = gpu_solver_get_stats(ctx->gpu_ctx);
    1034              :             stats->elapsed_time_ms = gpu_stats.kernel_time_ms;
    1035              : 
    1036              :             double max_vel = 0.0, max_p = 0.0;
    1037              :             for (size_t i = 0; i < field->nx * field->ny; i++) {
    1038              :                 double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
    1039              :                 if (vel > max_vel) {
    1040              :                     max_vel = vel;
    1041              :                 }
    1042              :                 if (fabs(field->p[i]) > max_p) {
    1043              :                     max_p = fabs(field->p[i]);
    1044              :                 }
    1045              :             }
    1046              :             stats->max_velocity = max_vel;
    1047              :             stats->max_pressure = max_p;
    1048              :         }
    1049              :         return CFD_SUCCESS;
    1050              :     }
    1051              : 
    1052              :     // GPU not available - could be: CUDA not compiled in, gpu_should_use() returned false,
    1053              :     // or GPU initialization failed
    1054              :     fprintf(stderr, "GPU Euler solve: GPU solver not initialized\n");
    1055              :     return CFD_ERROR_UNSUPPORTED;
    1056              : }
    1057              : 
    1058              : static ns_solver_t* create_explicit_euler_gpu_solver(void) {
    1059              :     /* Check if GPU is available at runtime before creating the solver */
    1060              :     if (!gpu_is_available()) {
    1061              :         cfd_set_error(CFD_ERROR_UNSUPPORTED, "CUDA GPU not available at runtime");
    1062              :         return NULL;
    1063              :     }
    1064              : 
    1065              :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
    1066              :     if (!s) {
    1067              :         return NULL;
    1068              :     }
    1069              : 
    1070              :     s->name = NS_SOLVER_TYPE_EXPLICIT_EULER_GPU;
    1071              :     s->description = "GPU-accelerated explicit Euler solver (CUDA)";
    1072              :     s->version = "1.0.0";
    1073              :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_GPU;
    1074              :     s->backend = NS_SOLVER_BACKEND_CUDA;
    1075              : 
    1076              :     s->init = gpu_euler_init;
    1077              :     s->destroy = gpu_euler_destroy;
    1078              :     s->step = gpu_euler_step;
    1079              :     s->solve = gpu_euler_solve;
    1080              :     s->apply_boundary = NULL;
    1081              :     s->compute_dt = NULL;
    1082              : 
    1083              :     return s;
    1084              : }
    1085              : 
    1086              : /**
    1087              :  * Built-in solver: GPU-Accelerated Projection Method
    1088              :  */
    1089              : 
    1090              : static cfd_status_t gpu_projection_step(ns_solver_t* solver, flow_field* field, const grid* grid,
    1091              :                                         const ns_solver_params_t* params, ns_solver_stats_t* stats) {
    1092              :     (void)solver;
    1093              :     (void)stats;
    1094              :     ns_solver_params_t step_params = *params;
    1095              :     step_params.max_iter = 1;
    1096              :     /* Override thresholds to allow single-step GPU execution on small grids */
    1097              :     gpu_config_t cfg = gpu_config_default();
    1098              :     cfg.min_grid_size = 1;
    1099              :     cfg.min_steps = 1;
    1100              :     return solve_projection_method_gpu(field, grid, &step_params, &cfg);
    1101              : }
    1102              : 
    1103              : static cfd_status_t gpu_projection_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
    1104              :                                          const ns_solver_params_t* params, ns_solver_stats_t* stats) {
    1105              :     (void)solver;
    1106              :     (void)stats;
    1107              :     /* Override thresholds to allow GPU execution on small grids */
    1108              :     gpu_config_t cfg = gpu_config_default();
    1109              :     cfg.min_grid_size = 1;
    1110              :     cfg.min_steps = 1;
    1111              :     return solve_projection_method_gpu(field, grid, params, &cfg);
    1112              : }
    1113              : 
    1114              : static ns_solver_t* create_projection_gpu_solver(void) {
    1115              :     /* Check if GPU is available at runtime before creating the solver */
    1116              :     if (!gpu_is_available()) {
    1117              :         cfd_set_error(CFD_ERROR_UNSUPPORTED, "CUDA GPU not available at runtime");
    1118              :         return NULL;
    1119              :     }
    1120              : 
    1121              :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
    1122              :     if (!s) {
    1123              :         return NULL;
    1124              :     }
    1125              : 
    1126              :     s->name = NS_SOLVER_TYPE_PROJECTION_JACOBI_GPU;
    1127              :     s->description = "GPU-accelerated projection method with Jacobi iteration (CUDA)";
    1128              :     s->version = "1.0.0";
    1129              :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_GPU;
    1130              :     s->backend = NS_SOLVER_BACKEND_CUDA;
    1131              : 
    1132              :     s->init = NULL;
    1133              :     s->destroy = NULL;
    1134              :     s->step = gpu_projection_step;
    1135              :     s->solve = gpu_projection_solve;
    1136              :     s->apply_boundary = NULL;
    1137              :     s->compute_dt = NULL;
    1138              : 
    1139              :     return s;
    1140              : }
    1141              : #endif /* CFD_HAS_CUDA */
    1142              : 
    1143              : #ifdef CFD_ENABLE_OPENMP
    1144              : /**
    1145              :  * Built-in solver: Explicit Euler OpenMP
    1146              :  */
    1147              : 
    1148          212 : static cfd_status_t explicit_euler_omp_step(ns_solver_t* solver, flow_field* field, const grid* grid,
    1149              :                                             const ns_solver_params_t* params, ns_solver_stats_t* stats) {
    1150          212 :     (void)solver;
    1151          212 :     if (field->nx < 3 || field->ny < 3) {
    1152              :         return CFD_ERROR_INVALID;
    1153              :     }
    1154              : 
    1155          212 :     ns_solver_params_t step_params = *params;
    1156          212 :     step_params.max_iter = 1;
    1157              : 
    1158          212 :     explicit_euler_omp_impl(field, grid, &step_params);
    1159              : 
    1160          212 :     if (stats) {
    1161          212 :         stats->iterations = 1;
    1162          212 :         double max_vel = 0.0, max_p = 0.0;
    1163      1751380 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
    1164      1751168 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
    1165      1751168 :             if (vel > max_vel) {
    1166         4080 :                 max_vel = vel;
    1167              :             }
    1168      1751168 :             if (fabs(field->p[i]) > max_p) {
    1169         6785 :                 max_p = fabs(field->p[i]);
    1170              :             }
    1171              :         }
    1172          212 :         stats->max_velocity = max_vel;
    1173          212 :         stats->max_pressure = max_p;
    1174              :     }
    1175              :     return CFD_SUCCESS;
    1176              : }
    1177              : 
    1178            0 : static cfd_status_t explicit_euler_omp_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
    1179              :                                              const ns_solver_params_t* params, ns_solver_stats_t* stats) {
    1180            0 :     (void)solver;
    1181            0 :     if (field->nx < 3 || field->ny < 3) {
    1182              :         return CFD_ERROR_INVALID;
    1183              :     }
    1184              : 
    1185            0 :     explicit_euler_omp_impl(field, grid, params);
    1186              : 
    1187            0 :     if (stats) {
    1188            0 :         stats->iterations = params->max_iter;
    1189            0 :         double max_vel = 0.0, max_p = 0.0;
    1190            0 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
    1191            0 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
    1192            0 :             if (vel > max_vel) {
    1193            0 :                 max_vel = vel;
    1194              :             }
    1195            0 :             if (fabs(field->p[i]) > max_p) {
    1196            0 :                 max_p = fabs(field->p[i]);
    1197              :             }
    1198              :         }
    1199            0 :         stats->max_velocity = max_vel;
    1200            0 :         stats->max_pressure = max_p;
    1201              :     }
    1202              :     return CFD_SUCCESS;
    1203              : }
    1204              : 
    1205           15 : static ns_solver_t* create_explicit_euler_omp_solver(void) {
    1206           15 :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
    1207           15 :     if (!s) {
    1208              :         return NULL;
    1209              :     }
    1210              : 
    1211           15 :     s->name = NS_SOLVER_TYPE_EXPLICIT_EULER_OMP;
    1212           15 :     s->description = "OpenMP-parallelized explicit Euler solver";
    1213           15 :     s->version = "1.0.0";
    1214           15 :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_PARALLEL;
    1215           15 :     s->backend = NS_SOLVER_BACKEND_OMP;
    1216              : 
    1217           15 :     s->init = explicit_euler_init;        // Can reuse existing init
    1218           15 :     s->destroy = explicit_euler_destroy;  // Can reuse existing destroy
    1219           15 :     s->step = explicit_euler_omp_step;
    1220           15 :     s->solve = explicit_euler_omp_solve;
    1221           15 :     s->apply_boundary = NULL;
    1222           15 :     s->compute_dt = NULL;
    1223              : 
    1224           15 :     return s;
    1225              : }
    1226              : 
    1227              : /**
    1228              :  * Built-in NSSolver: RK2 OpenMP
    1229              :  */
    1230              : 
    1231          212 : static cfd_status_t rk2_omp_step(ns_solver_t* solver, flow_field* field, const grid* grid,
    1232              :                                   const ns_solver_params_t* params, ns_solver_stats_t* stats) {
    1233          212 :     (void)solver;
    1234          212 :     if (field->nx < 3 || field->ny < 3) {
    1235              :         return CFD_ERROR_INVALID;
    1236              :     }
    1237              : 
    1238          212 :     ns_solver_params_t step_params = *params;
    1239          212 :     step_params.max_iter = 1;
    1240              : 
    1241          212 :     cfd_status_t status = rk2_omp_impl(field, grid, &step_params);
    1242              : 
    1243          212 :     if (stats) {
    1244          212 :         stats->iterations = 1;
    1245          212 :         double max_vel = 0.0, max_p = 0.0;
    1246          212 :         ptrdiff_t i;
    1247          212 :         ptrdiff_t n = (ptrdiff_t)(field->nx * field->ny);
    1248              : #if _OPENMP >= 201107
    1249          212 :         #pragma omp parallel for reduction(max: max_vel, max_p) schedule(static)
    1250              : #endif
    1251              :         for (i = 0; i < n; i++) {
    1252              :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
    1253              :             if (vel > max_vel) {
    1254              :                 max_vel = vel;
    1255              :             }
    1256              :             double ap = fabs(field->p[i]);
    1257              :             if (ap > max_p) {
    1258              :                 max_p = ap;
    1259              :             }
    1260              :         }
    1261          212 :         stats->max_velocity = max_vel;
    1262          212 :         stats->max_pressure = max_p;
    1263              :     }
    1264              :     return status;
    1265              : }
    1266              : 
    1267            0 : static cfd_status_t rk2_omp_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
    1268              :                                    const ns_solver_params_t* params, ns_solver_stats_t* stats) {
    1269            0 :     (void)solver;
    1270            0 :     if (field->nx < 3 || field->ny < 3) {
    1271              :         return CFD_ERROR_INVALID;
    1272              :     }
    1273              : 
    1274            0 :     cfd_status_t status = rk2_omp_impl(field, grid, params);
    1275              : 
    1276            0 :     if (stats) {
    1277            0 :         stats->iterations = params->max_iter;
    1278            0 :         double max_vel = 0.0, max_p = 0.0;
    1279            0 :         ptrdiff_t i;
    1280            0 :         ptrdiff_t n = (ptrdiff_t)(field->nx * field->ny);
    1281              : #if _OPENMP >= 201107
    1282            0 :         #pragma omp parallel for reduction(max: max_vel, max_p) schedule(static)
    1283              : #endif
    1284              :         for (i = 0; i < n; i++) {
    1285              :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
    1286              :             if (vel > max_vel) {
    1287              :                 max_vel = vel;
    1288              :             }
    1289              :             double ap = fabs(field->p[i]);
    1290              :             if (ap > max_p) {
    1291              :                 max_p = ap;
    1292              :             }
    1293              :         }
    1294            0 :         stats->max_velocity = max_vel;
    1295            0 :         stats->max_pressure = max_p;
    1296              :     }
    1297              :     return status;
    1298              : }
    1299              : 
    1300           12 : static ns_solver_t* create_rk2_omp_solver(void) {
    1301           12 :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
    1302           12 :     if (!s) {
    1303              :         return NULL;
    1304              :     }
    1305              : 
    1306           12 :     s->name = NS_SOLVER_TYPE_RK2_OMP;
    1307           12 :     s->description = "OpenMP-parallelized RK2 (Heun's method)";
    1308           12 :     s->version = "1.0.0";
    1309           12 :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_PARALLEL;
    1310           12 :     s->backend = NS_SOLVER_BACKEND_OMP;
    1311              : 
    1312           12 :     s->init = explicit_euler_init;
    1313           12 :     s->destroy = explicit_euler_destroy;
    1314           12 :     s->step = rk2_omp_step;
    1315           12 :     s->solve = rk2_omp_solve;
    1316           12 :     s->apply_boundary = NULL;
    1317           12 :     s->compute_dt = NULL;
    1318              : 
    1319           12 :     return s;
    1320              : }
    1321              : 
    1322              : /**
    1323              :  * Built-in NSSolver: Projection OpenMP
    1324              :  */
    1325              : 
    1326            8 : static cfd_status_t projection_omp_init(ns_solver_t* solver, const grid* grid,
    1327              :                                         const ns_solver_params_t* params) {
    1328            8 :     (void)params;
    1329            8 :     if (!solver || !grid) {
    1330              :         return CFD_ERROR_INVALID;
    1331              :     }
    1332              : 
    1333              :     /* OMP projection requires OMP CG Poisson solver.
    1334              :      * Never fall back to scalar CG — it would serialize the Poisson solve. */
    1335            8 :     poisson_solver_t* test_solver = poisson_solver_create(
    1336              :         POISSON_METHOD_CG, POISSON_BACKEND_OMP);
    1337            8 :     if (!test_solver) {
    1338            0 :         fprintf(stderr, "projection_omp_init: OMP CG Poisson solver not available\n");
    1339            0 :         return CFD_ERROR_UNSUPPORTED;
    1340              :     }
    1341            8 :     poisson_solver_destroy(test_solver);
    1342              : 
    1343            8 :     projection_context* ctx = (projection_context*)cfd_malloc(sizeof(projection_context));
    1344            8 :     if (!ctx) {
    1345              :         return CFD_ERROR_NOMEM;
    1346              :     }
    1347            8 :     ctx->initialized = 1;
    1348            8 :     solver->context = ctx;
    1349            8 :     return CFD_SUCCESS;
    1350              : }
    1351              : 
    1352          122 : static cfd_status_t projection_omp_step(ns_solver_t* solver, flow_field* field, const grid* grid,
    1353              :                                         const ns_solver_params_t* params, ns_solver_stats_t* stats) {
    1354          122 :     (void)solver;
    1355          122 :     if (field->nx < 3 || field->ny < 3) {
    1356              :         return CFD_ERROR_INVALID;
    1357              :     }
    1358              : 
    1359          122 :     ns_solver_params_t step_params = *params;
    1360          122 :     step_params.max_iter = 1;
    1361              : 
    1362          122 :     cfd_status_t status = solve_projection_method_omp(field, grid, &step_params);
    1363          122 :     if (status != CFD_SUCCESS) {
    1364              :         return status;
    1365              :     }
    1366              : 
    1367          122 :     if (stats) {
    1368          122 :         stats->iterations = 1;
    1369          122 :         double max_vel = 0.0, max_p = 0.0;
    1370          122 :         ptrdiff_t i;
    1371          122 :         ptrdiff_t n = (ptrdiff_t)(field->nx * field->ny);
    1372              : #if _OPENMP >= 201107
    1373          122 :         #pragma omp parallel for reduction(max: max_vel, max_p) schedule(static)
    1374              : #endif
    1375              :         for (i = 0; i < n; i++) {
    1376              :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
    1377              :             if (vel > max_vel) {
    1378              :                 max_vel = vel;
    1379              :             }
    1380              :             double ap = fabs(field->p[i]);
    1381              :             if (ap > max_p) {
    1382              :                 max_p = ap;
    1383              :             }
    1384              :         }
    1385          122 :         stats->max_velocity = max_vel;
    1386          122 :         stats->max_pressure = max_p;
    1387              :     }
    1388              :     return CFD_SUCCESS;
    1389              : }
    1390              : 
    1391            0 : static cfd_status_t projection_omp_solve(ns_solver_t* solver, flow_field* field, const grid* grid,
    1392              :                                          const ns_solver_params_t* params, ns_solver_stats_t* stats) {
    1393            0 :     (void)solver;
    1394            0 :     if (field->nx < 3 || field->ny < 3) {
    1395              :         return CFD_ERROR_INVALID;
    1396              :     }
    1397              : 
    1398            0 :     cfd_status_t status = solve_projection_method_omp(field, grid, params);
    1399            0 :     if (status != CFD_SUCCESS) {
    1400              :         return status;
    1401              :     }
    1402              : 
    1403            0 :     if (stats) {
    1404            0 :         stats->iterations = params->max_iter;
    1405            0 :         double max_vel = 0.0, max_p = 0.0;
    1406            0 :         for (size_t i = 0; i < field->nx * field->ny; i++) {
    1407            0 :             double vel = sqrt((field->u[i] * field->u[i]) + (field->v[i] * field->v[i]));
    1408            0 :             if (vel > max_vel) {
    1409            0 :                 max_vel = vel;
    1410              :             }
    1411            0 :             if (fabs(field->p[i]) > max_p) {
    1412            0 :                 max_p = fabs(field->p[i]);
    1413              :             }
    1414              :         }
    1415            0 :         stats->max_velocity = max_vel;
    1416            0 :         stats->max_pressure = max_p;
    1417              :     }
    1418              :     return CFD_SUCCESS;
    1419              : }
    1420              : 
    1421           15 : static ns_solver_t* create_projection_omp_solver(void) {
    1422           15 :     ns_solver_t* s = (ns_solver_t*)cfd_calloc(1, sizeof(*s));
    1423           15 :     if (!s) {
    1424              :         return NULL;
    1425              :     }
    1426              : 
    1427           15 :     s->name = NS_SOLVER_TYPE_PROJECTION_OMP;
    1428           15 :     s->description = "OpenMP-parallelized Projection solver";
    1429           15 :     s->version = "1.0.0";
    1430           15 :     s->capabilities = NS_SOLVER_CAP_INCOMPRESSIBLE | NS_SOLVER_CAP_TRANSIENT | NS_SOLVER_CAP_PARALLEL;
    1431              : 
    1432           15 :     s->init = projection_omp_init;    // Checks OMP CG availability
    1433           15 :     s->destroy = projection_destroy;
    1434           15 :     s->step = projection_omp_step;
    1435           15 :     s->solve = projection_omp_solve;
    1436           15 :     s->apply_boundary = NULL;
    1437           15 :     s->compute_dt = NULL;
    1438           15 :     s->backend = NS_SOLVER_BACKEND_OMP;
    1439              : 
    1440           15 :     return s;
    1441              : }
    1442              : #endif
    1443              : 
    1444              : //=============================================================================
    1445              : // Backend Availability API
    1446              : //=============================================================================
    1447              : 
    1448           30 : int cfd_backend_is_available(ns_solver_backend_t backend) {
    1449           30 :     switch (backend) {
    1450              :         case NS_SOLVER_BACKEND_SCALAR:
    1451              :             return 1;  // Always available
    1452              : 
    1453           12 :         case NS_SOLVER_BACKEND_SIMD:
    1454           12 :             return cfd_has_simd();
    1455              : 
    1456              :         case NS_SOLVER_BACKEND_OMP:
    1457              : #ifdef CFD_ENABLE_OPENMP
    1458              :             return 1;
    1459              : #else
    1460              :             return 0;
    1461              : #endif
    1462              : 
    1463            4 :         case NS_SOLVER_BACKEND_CUDA:
    1464            4 :             return gpu_is_available();
    1465              : 
    1466            1 :         default:
    1467            1 :             return 0;
    1468              :     }
    1469              : }
    1470              : 
    1471            8 : const char* cfd_backend_get_name(ns_solver_backend_t backend) {
    1472            8 :     switch (backend) {
    1473              :         case NS_SOLVER_BACKEND_SCALAR:
    1474              :             return "scalar";
    1475            2 :         case NS_SOLVER_BACKEND_SIMD:
    1476            2 :             return "simd";
    1477            1 :         case NS_SOLVER_BACKEND_OMP:
    1478            1 :             return "openmp";
    1479            3 :         case NS_SOLVER_BACKEND_CUDA:
    1480            3 :             return "cuda";
    1481            1 :         default:
    1482            1 :             return "unknown";
    1483              :     }
    1484              : }
    1485              : 
    1486           13 : int cfd_registry_list_by_backend(ns_solver_registry_t* registry, ns_solver_backend_t backend,
    1487              :                                   const char** names, int max_count) {
    1488           13 :     if (!registry) {
    1489              :         return 0;
    1490              :     }
    1491              : 
    1492              :     int count = 0;
    1493              : 
    1494              :     /* If names is NULL, just count total matches (discovery mode).
    1495              :      * Otherwise, fill the array up to max_count and return number filled. */
    1496          111 :     for (int i = 0; i < registry->count; i++) {
    1497              :         /* Use the stored backend instead of creating temporary solvers.
    1498              :          * This is much more efficient and avoids side effects from factory calls. */
    1499          100 :         if (registry->entries[i].backend == backend) {
    1500           28 :             if (names == NULL) {
    1501              :                 /* Discovery mode: count all matches */
    1502            3 :                 count++;
    1503           25 :             } else if (count < max_count) {
    1504              :                 /* Fill mode: add to array if space available */
    1505           24 :                 names[count] = registry->entries[i].name;
    1506           24 :                 count++;
    1507              :             } else {
    1508              :                 /* Array full, stop filling but don't count more */
    1509              :                 break;
    1510              :             }
    1511              :         }
    1512              :     }
    1513              : 
    1514              :     return count;
    1515              : }
    1516              : 
    1517           12 : ns_solver_t* cfd_solver_create_checked(ns_solver_registry_t* registry, const char* type_name) {
    1518           12 :     if (!registry || !type_name) {
    1519            2 :         cfd_set_error(CFD_ERROR_INVALID, "Invalid arguments for solver creation");
    1520            2 :         return NULL;
    1521              :     }
    1522              : 
    1523              :     /* Check backend availability BEFORE creating the solver */
    1524           10 :     ns_solver_backend_t expected_backend = infer_backend_from_type(type_name);
    1525           10 :     if (!cfd_backend_is_available(expected_backend)) {
    1526            2 :         const char* backend_name = cfd_backend_get_name(expected_backend);
    1527            2 :         char error_msg[128];
    1528            2 :         snprintf(error_msg, sizeof(error_msg),
    1529              :                  "Backend '%s' is not available on this system", backend_name);
    1530            2 :         cfd_set_error(CFD_ERROR_UNSUPPORTED, error_msg);
    1531            2 :         return NULL;
    1532              :     }
    1533              : 
    1534              :     /* Now create the solver - backend is available */
    1535            8 :     ns_solver_t* solver = cfd_solver_create(registry, type_name);
    1536            8 :     if (!solver) {
    1537              :         /* Error already set by cfd_solver_create */
    1538              :         return NULL;
    1539              :     }
    1540              : 
    1541              :     return solver;
    1542              : }
        

Generated by: LCOV version 2.0-1