LCOV - code coverage report
Current view: top level - boundary/simd - boundary_conditions_simd_dispatch.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 12.1 % 66 8
Test Date: 2026-03-04 10:22:18 Functions: 18.2 % 11 2

            Line data    Source code
       1              : /**
       2              :  * Boundary Conditions - SIMD Dispatcher with Runtime Detection
       3              :  *
       4              :  * This file provides the unified bc_impl_simd interface by selecting
       5              :  * the correct architecture-specific implementation at RUNTIME:
       6              :  * - AVX2 on x86-64 (detected via CPUID)
       7              :  * - NEON on ARM64 (always available on ARM64)
       8              :  *
       9              :  * The actual implementations remain in separate files:
      10              :  * - avx2/boundary_conditions_avx2.c
      11              :  * - neon/boundary_conditions_neon.c
      12              :  *
      13              :  * Compile-Time vs Runtime Detection:
      14              :  * ----------------------------------
      15              :  * The availability check (simd_available) uses BOTH:
      16              :  * 1. Runtime CPU detection: cfd_detect_simd_arch() checks if CPU supports AVX2/NEON
      17              :  * 2. Compile-time availability: Checks if function pointers are non-NULL
      18              :  *
      19              :  * This two-phase check handles the case where:
      20              :  * - CPU supports AVX2, but code was compiled without -mavx2 flag
      21              :  * - In this case, bc_impl_avx2 has NULL pointers, so simd_available()
      22              :  *   returns false even though runtime detection reports AVX2 support.
      23              :  *
      24              :  * This design ensures safe operation: SIMD backend is only used when BOTH
      25              :  * the CPU supports it AND the code was compiled with SIMD instructions.
      26              :  *
      27              :  * Error Handling:
      28              :  * If called when no SIMD backend is available (programming error), these
      29              :  * dispatcher functions will:
      30              :  * 1. Call the user-configurable error handler (or print to stderr if none set)
      31              :  * 2. Assert in debug builds
      32              :  * 3. Fall back to scalar implementation to avoid leaving fields in invalid state
      33              :  *
      34              :  * Callers SHOULD check bc_simd_backend_available() before using this backend.
      35              :  */
      36              : 
      37              : #include "../boundary_conditions_internal.h"
      38              : #include "cfd/core/cpu_features.h"
      39              : #include <stdbool.h>
      40              : #include <stdint.h>
      41              : #include <assert.h>
      42              : #include <stdio.h>
      43              : 
      44              : /* Platform-specific atomic operations for thread-safe caching */
      45              : #ifdef _MSC_VER
      46              : #include <intrin.h>
      47              : #define ATOMIC_LOAD(ptr) _InterlockedCompareExchange64((volatile long long*)(ptr), 0, 0)
      48              : #define ATOMIC_CAS(ptr, expected, desired) \
      49              :     (_InterlockedCompareExchange64((volatile long long*)(ptr), (long long)(desired), (long long)(*(expected))) == (long long)(*(expected)))
      50              : #define COMPILER_BARRIER() _ReadWriteBarrier()
      51              : #else
      52              : /* GCC/Clang built-in atomics */
      53              : #define ATOMIC_LOAD(ptr) __atomic_load_n((ptr), __ATOMIC_ACQUIRE)
      54              : #define ATOMIC_CAS(ptr, expected, desired) \
      55              :     __atomic_compare_exchange_n((ptr), (expected), (desired), 0, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)
      56              : #define COMPILER_BARRIER() __asm__ __volatile__("" ::: "memory")
      57              : #endif
      58              : 
      59              : /* ============================================================================
      60              :  * Helper: Get SIMD backend based on runtime detection
      61              :  *
      62              :  * Returns the appropriate backend implementation table (AVX2 or NEON) based
      63              :  * on runtime CPU detection. Returns NULL if no SIMD backend is available.
      64              :  *
      65              :  * The result is cached after first call since SIMD architecture doesn't change
      66              :  * at runtime. This avoids redundant calls to cfd_detect_simd_arch() on every
      67              :  * boundary condition operation.
      68              :  *
      69              :  * Thread Safety:
      70              :  * Uses atomic compare-and-swap to ensure proper synchronization. Only one
      71              :  * thread will successfully initialize the cache; others will use the
      72              :  * already-cached result. Memory barriers ensure visibility across threads.
      73              :  * ============================================================================ */
      74              : 
      75              : /* Cache for the SIMD backend pointer.
      76              :  * Values: 0 = not initialized, 1 = no backend, 2+ = valid backend pointer + 1
      77              :  * Using intptr_t allows atomic operations and encodes state in a single variable. */
      78              : static volatile intptr_t g_simd_backend_cache = 0;
      79              : 
      80              : /* Sentinel values for cache state */
      81              : #define CACHE_UNINITIALIZED 0
      82              : #define CACHE_NO_BACKEND    1
      83              : 
      84            0 : static const bc_backend_impl_t* get_simd_backend(void) {
      85              :     /* Fast path: check if already initialized */
      86            0 :     intptr_t cached = (intptr_t)ATOMIC_LOAD(&g_simd_backend_cache);
      87              : 
      88            0 :     if (cached != CACHE_UNINITIALIZED) {
      89            0 :         if (cached == CACHE_NO_BACKEND) {
      90              :             return NULL;
      91              :         }
      92              :         /* Decode pointer: subtract 1 and cast back */
      93            0 :         return (const bc_backend_impl_t*)(cached - 1);
      94              :     }
      95              : 
      96              :     /* Slow path: detect SIMD backend */
      97            0 :     cfd_simd_arch_t arch = cfd_detect_simd_arch();
      98            0 :     const bc_backend_impl_t* result = NULL;
      99              : 
     100            0 :     if (arch == CFD_SIMD_AVX2 && bc_impl_avx2.apply_neumann != NULL) {
     101              :         result = &bc_impl_avx2;
     102            0 :     } else if (arch == CFD_SIMD_NEON && bc_impl_neon.apply_neumann != NULL) {
     103              :         result = &bc_impl_neon;
     104              :     }
     105              : 
     106              :     /* Encode result: NULL becomes CACHE_NO_BACKEND, valid pointer becomes ptr+1 */
     107            0 :     intptr_t new_value = result ? ((intptr_t)result + 1) : CACHE_NO_BACKEND;
     108              : 
     109              :     /* Try to set cache atomically. If another thread beat us, use their result. */
     110            0 :     intptr_t expected = CACHE_UNINITIALIZED;
     111            0 :     if (!ATOMIC_CAS(&g_simd_backend_cache, &expected, new_value)) {
     112              :         /* Another thread initialized first - use their cached value */
     113            0 :         if (expected == CACHE_NO_BACKEND) {
     114              :             return NULL;
     115              :         }
     116            0 :         return (const bc_backend_impl_t*)(expected - 1);
     117              :     }
     118              : 
     119              :     return result;
     120              : }
     121              : 
     122              : /**
     123              :  * Report error when SIMD backend is unavailable.
     124              :  * Called as a programming error fallback - callers should check availability first.
     125              :  */
     126            0 : static void report_no_simd_error(const char* function) {
     127            0 :     char message[128];
     128            0 :     snprintf(message, sizeof(message),
     129              :              "SIMD backend called but no SIMD available (detected: %s). "
     130              :              "Falling back to scalar.",
     131              :              cfd_get_simd_name());
     132            0 :     bc_report_error(BC_ERROR_NO_SIMD_BACKEND, function, message);
     133            0 :     assert(0 && "SIMD backend called without available implementation");
     134              : }
     135              : 
     136              : /* ============================================================================
     137              :  * Runtime Dispatching Functions
     138              :  *
     139              :  * These functions use get_simd_backend() for unified dispatch logic.
     140              :  * ============================================================================ */
     141              : 
     142            0 : static void bc_simd_neumann(double* field, size_t nx, size_t ny,
     143              :                              size_t nz, size_t stride_z) {
     144            0 :     const bc_backend_impl_t* impl = get_simd_backend();
     145            0 :     if (impl != NULL) {
     146            0 :         impl->apply_neumann(field, nx, ny, nz, stride_z);
     147            0 :         return;
     148              :     }
     149            0 :     report_no_simd_error("bc_simd_neumann");
     150              :     bc_apply_neumann_scalar_impl(field, nx, ny, nz, stride_z);
     151              : }
     152              : 
     153            0 : static void bc_simd_periodic(double* field, size_t nx, size_t ny,
     154              :                               size_t nz, size_t stride_z) {
     155            0 :     const bc_backend_impl_t* impl = get_simd_backend();
     156            0 :     if (impl != NULL) {
     157            0 :         impl->apply_periodic(field, nx, ny, nz, stride_z);
     158            0 :         return;
     159              :     }
     160            0 :     report_no_simd_error("bc_simd_periodic");
     161              :     bc_apply_periodic_scalar_impl(field, nx, ny, nz, stride_z);
     162              : }
     163              : 
     164            0 : static void bc_simd_dirichlet(double* field, size_t nx, size_t ny,
     165              :                                size_t nz, size_t stride_z,
     166              :                                const bc_dirichlet_values_t* values) {
     167            0 :     const bc_backend_impl_t* impl = get_simd_backend();
     168            0 :     if (impl != NULL) {
     169            0 :         impl->apply_dirichlet(field, nx, ny, nz, stride_z, values);
     170            0 :         return;
     171              :     }
     172            0 :     report_no_simd_error("bc_simd_dirichlet");
     173              :     bc_apply_dirichlet_scalar_impl(field, nx, ny, nz, stride_z, values);
     174              : }
     175              : 
     176            0 : static cfd_status_t bc_simd_inlet(double* u, double* v, double* w,
     177              :                                    size_t nx, size_t ny,
     178              :                                    size_t nz, size_t stride_z,
     179              :                                    const bc_inlet_config_t* config) {
     180              :     /* Inlet BCs operate on 1D boundaries - SIMD provides limited benefit.
     181              :      * Delegate to the architecture-specific backend if available, otherwise
     182              :      * fall back to scalar implementation. */
     183            0 :     const bc_backend_impl_t* impl = get_simd_backend();
     184            0 :     if (impl != NULL && impl->apply_inlet != NULL) {
     185            0 :         return impl->apply_inlet(u, v, w, nx, ny, nz, stride_z, config);
     186              :     }
     187              :     /* Fall back to scalar implementation for inlet */
     188            0 :     return bc_apply_inlet_scalar_impl(u, v, w, nx, ny, nz, stride_z, config);
     189              : }
     190              : 
     191            0 : static cfd_status_t bc_simd_outlet(double* field, size_t nx, size_t ny,
     192              :                                     size_t nz, size_t stride_z,
     193              :                                     const bc_outlet_config_t* config) {
     194              :     /* Outlet BCs operate on 1D boundaries - SIMD provides limited benefit
     195              :      * except for top/bottom edges where memory is contiguous.
     196              :      * Delegate to the architecture-specific backend if available, otherwise
     197              :      * fall back to scalar implementation. */
     198            0 :     const bc_backend_impl_t* impl = get_simd_backend();
     199            0 :     if (impl != NULL && impl->apply_outlet != NULL) {
     200            0 :         return impl->apply_outlet(field, nx, ny, nz, stride_z, config);
     201              :     }
     202              :     /* Fall back to scalar implementation for outlet */
     203            0 :     return bc_apply_outlet_scalar_impl(field, nx, ny, nz, stride_z, config);
     204              : }
     205              : 
     206            0 : static cfd_status_t bc_simd_symmetry(double* u, double* v, double* w,
     207              :                                       size_t nx, size_t ny,
     208              :                                       size_t nz, size_t stride_z,
     209              :                                       const bc_symmetry_config_t* config) {
     210              :     /* Symmetry BCs operate on 1D boundaries - SIMD provides limited benefit.
     211              :      * Delegate to the architecture-specific backend if available, otherwise
     212              :      * fall back to scalar implementation. */
     213            0 :     const bc_backend_impl_t* impl = get_simd_backend();
     214            0 :     if (impl != NULL && impl->apply_symmetry != NULL) {
     215            0 :         return impl->apply_symmetry(u, v, w, nx, ny, nz, stride_z, config);
     216              :     }
     217              :     /* Fall back to scalar implementation for symmetry */
     218            0 :     return bc_apply_symmetry_scalar_impl(u, v, w, nx, ny, nz, stride_z, config);
     219              : }
     220              : 
     221              : /* ============================================================================
     222              :  * Check if SIMD backend is available at runtime
     223              :  * ============================================================================ */
     224              : 
     225              : /**
     226              :  * Check if a backend implementation table is fully populated.
     227              :  * All three function pointers must be non-NULL for the backend to be usable.
     228              :  */
     229       234460 : static bool backend_impl_complete(const bc_backend_impl_t* impl) {
     230       234460 :     return impl->apply_neumann != NULL &&
     231            0 :            impl->apply_periodic != NULL &&
     232            0 :            impl->apply_dirichlet != NULL;
     233              : }
     234              : 
     235              : /**
     236              :  * Check if any SIMD implementation is available.
     237              :  * Called during initialization to determine if bc_impl_simd should be used.
     238              :  *
     239              :  * Verifies all three function pointers (neumann, periodic, dirichlet) are present.
     240              :  * This ensures the backend is fully functional, not just partially implemented.
     241              :  */
     242       234460 : static bool simd_available(void) {
     243       234460 :     cfd_simd_arch_t arch = cfd_detect_simd_arch();
     244              : 
     245       234460 :     if (arch == CFD_SIMD_AVX2) {
     246       468920 :         return backend_impl_complete(&bc_impl_avx2);
     247            0 :     } else if (arch == CFD_SIMD_NEON) {
     248            0 :         return backend_impl_complete(&bc_impl_neon);
     249              :     }
     250              :     return false;
     251              : }
     252              : 
     253              : /* ============================================================================
     254              :  * Unified SIMD Interface
     255              :  *
     256              :  * bc_impl_simd provides runtime-dispatching functions.
     257              :  * The functions check availability internally.
     258              :  * ============================================================================ */
     259              : 
     260              : const bc_backend_impl_t bc_impl_simd = {
     261              :     .apply_neumann = bc_simd_neumann,
     262              :     .apply_periodic = bc_simd_periodic,
     263              :     .apply_dirichlet = bc_simd_dirichlet,
     264              :     .apply_inlet = bc_simd_inlet,
     265              :     .apply_outlet = bc_simd_outlet,
     266              :     .apply_symmetry = bc_simd_symmetry
     267              : };
     268              : 
     269              : /**
     270              :  * Query function for external code to check if SIMD is actually available.
     271              :  * This is used by the backend availability check since bc_impl_simd
     272              :  * always has non-NULL function pointers (they do runtime dispatch).
     273              :  */
     274       234460 : bool bc_simd_backend_available(void) {
     275       234460 :     return simd_available();
     276              : }
     277              : 
     278              : /**
     279              :  * Get the name of the detected SIMD architecture.
     280              :  * Returns "avx2", "neon", or "none" based on runtime detection.
     281              :  */
     282            0 : const char* bc_simd_get_arch_name(void) {
     283            0 :     return cfd_get_simd_name();
     284              : }
        

Generated by: LCOV version 2.0-1