Xyst test code coverage report
Current view: top level - Base - LoadDistributor.cpp (source / functions) Coverage Total Hit
Commit: 1fb74642dd9d7732b67f32dec2f2762e238d3fa7 Lines: 100.0 % 11 11
Test Date: 2025-08-13 22:46:33 Functions: 100.0 % 1 1
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 50.0 % 26 13

             Branch data     Line data    Source code
       1                 :             : // *****************************************************************************
       2                 :             : /*!
       3                 :             :   \file      src/Base/LoadDistributor.cpp
       4                 :             :   \copyright 2012-2015 J. Bakosi,
       5                 :             :              2016-2018 Los Alamos National Security, LLC.,
       6                 :             :              2019-2021 Triad National Security, LLC.,
       7                 :             :              2022-2025 J. Bakosi
       8                 :             :              All rights reserved. See the LICENSE file for details.
       9                 :             :   \brief     Load distributors
      10                 :             :   \details   Load distributors compute chunksize based on the degree of
      11                 :             :      virtualization.
      12                 :             : */
      13                 :             : // *****************************************************************************
      14                 :             : 
      15                 :             : #include <limits>
      16                 :             : 
      17                 :             : #include "Types.hpp"
      18                 :             : #include "LoadDistributor.hpp"
      19                 :             : #include "Exception.hpp"
      20                 :             : 
      21                 :             : namespace tk {
      22                 :             : 
      23                 :             : uint64_t
      24                 :         262 : linearLoadDistributor( real virtualization,
      25                 :             :                        uint64_t load,
      26                 :             :                        int npe,
      27                 :             :                        uint64_t& chunksize,
      28                 :             :                        uint64_t& remainder )
      29                 :             : // *****************************************************************************
      30                 :             : //  Compute linear load distribution for given total work and virtualization
      31                 :             : //! \param[in] virtualization Degree of virtualization [0.0...1.0]
      32                 :             : //! \param[in] load Total load, e.g., number of particles, number of mesh cells
      33                 :             : //! \param[in] npe Number of processing elements to distribute the load to
      34                 :             : //! \param[inout] chunksize Chunk size, see detailed description
      35                 :             : //! \param[inout] remainder Remainder, see detailed description
      36                 :             : //! \return Number of work units
      37                 :             : //! \details Compute load distibution (number of chares and chunksize) based on
      38                 :             : //!   total work (e.g., total number of particles) and virtualization
      39                 :             : //!
      40                 :             : //!   The virtualization parameter, specified by the user, is a real number
      41                 :             : //!   between 0.0 and 1.0, inclusive, which controls the degree of
      42                 :             : //!   virtualization or over-decomposition. Independent of the value of
      43                 :             : //!   virtualization the work is approximately evenly distributed among the
      44                 :             : //!   available processing elements, given by npe. For zero virtualization (no
      45                 :             : //!   over-decomposition), the work is simply decomposed into total_work/numPEs,
      46                 :             : //!   which yields the smallest number of Charm++ chares and the largest chunks
      47                 :             : //!   of work units. The other extreme is unity virtualization, which decomposes
      48                 :             : //!   the total work into the smallest size work units possible, yielding the
      49                 :             : //!   largest number of Charm++ chares. Obviously, the optimum will be between
      50                 :             : //!   0.0 and 1.0, depending on the problem.
      51                 :             : //!
      52                 :             : //!   The formula implemented uses a linear relationship between the
      53                 :             : //!   virtualization parameter and the number of work units with the extremes
      54                 :             : //!   described above. The formula is given by
      55                 :             : //!
      56                 :             : //!   chunksize = (1 - n) * v + n;
      57                 :             : //!
      58                 :             : //!   where
      59                 :             : //!    - v = degree of virtualization
      60                 :             : //!    - n = load/npes
      61                 :             : //!    - load = total work, e.g., number of particles, number of mesh cells
      62                 :             : //!    - npes = number of hardware processing elements
      63                 :             : // *****************************************************************************
      64                 :             : {
      65 [ +  + ][ +  + ]:         274 :   Assert( virtualization > -std::numeric_limits< real >::epsilon() &&
         [ +  - ][ +  - ]
                 [ +  - ]
      66                 :             :           virtualization < 1.0+std::numeric_limits< real >::epsilon(),
      67                 :             :           "Virtualization parameter must be between [0.0...1.0]" );
      68 [ +  + ][ +  - ]:         266 :   Assert( npe > 0, "Number of processing elements must be larger than zero" );
         [ +  - ][ +  - ]
      69                 :             : 
      70                 :             :   // Compute minimum number of work units
      71                 :         259 :   const auto n = static_cast< real >( load ) / npe;
      72                 :             : 
      73                 :             :   // Compute work unit size based on the linear formula above
      74                 :         259 :   chunksize = static_cast< uint64_t >( (1.0 - n) * virtualization + n );
      75                 :             : 
      76 [ -  + ][ -  - ]:         259 :   Assert( load >= chunksize, "Load must be larger than chunksize" );
         [ -  - ][ -  - ]
      77                 :             : 
      78                 :             :   // Compute number of work units with size computed ignoring remainder
      79                 :         259 :   uint64_t nchare = load / chunksize;
      80                 :             : 
      81                 :             :   // Compute remainder of work if the above number of units were to be created
      82                 :         259 :   remainder = load - nchare * chunksize;
      83                 :             : 
      84                 :             :   // Redistribute remainder among the work units for a more equal distribution
      85                 :         259 :   chunksize += remainder / nchare;
      86                 :             : 
      87                 :             :   // Compute new remainder (after redistribution of the previous remainder)
      88                 :         259 :   remainder = load - nchare * chunksize;
      89                 :             : 
      90                 :             :   // Return number of work units (number of Charm++ chares)
      91                 :         259 :   return nchare;
      92                 :             : }
      93                 :             : 
      94                 :             : } // tk::
        

Generated by: LCOV version 2.0-1