Branch data Line data Source code
1 : : // ***************************************************************************** 2 : : /*! 3 : : \file src/Base/LoadDistributor.cpp 4 : : \copyright 2012-2015 J. Bakosi, 5 : : 2016-2018 Los Alamos National Security, LLC., 6 : : 2019-2021 Triad National Security, LLC., 7 : : 2022-2024 J. Bakosi 8 : : All rights reserved. See the LICENSE file for details. 9 : : \brief Load distributors 10 : : \details Load distributors compute chunksize based on the degree of 11 : : virtualization. 12 : : */ 13 : : // ***************************************************************************** 14 : : 15 : : #include <limits> 16 : : 17 : : #include "Types.hpp" 18 : : #include "LoadDistributor.hpp" 19 : : #include "Exception.hpp" 20 : : 21 : : namespace tk { 22 : : 23 : : uint64_t 24 : 246 : linearLoadDistributor( real virtualization, 25 : : uint64_t load, 26 : : int npe, 27 : : uint64_t& chunksize, 28 : : uint64_t& remainder ) 29 : : // ***************************************************************************** 30 : : // Compute linear load distribution for given total work and virtualization 31 : : //! \param[in] virtualization Degree of virtualization [0.0...1.0] 32 : : //! \param[in] load Total load, e.g., number of particles, number of mesh cells 33 : : //! \param[in] npe Number of processing elements to distribute the load to 34 : : //! \param[inout] chunksize Chunk size, see detailed description 35 : : //! \param[inout] remainder Remainder, see detailed description 36 : : //! \return Number of work units 37 : : //! \details Compute load distibution (number of chares and chunksize) based on 38 : : //! total work (e.g., total number of particles) and virtualization 39 : : //! 40 : : //! The virtualization parameter, specified by the user, is a real number 41 : : //! between 0.0 and 1.0, inclusive, which controls the degree of 42 : : //! virtualization or over-decomposition. Independent of the value of 43 : : //! virtualization the work is approximately evenly distributed among the 44 : : //! available processing elements, given by npe. For zero virtualization (no 45 : : //! over-decomposition), the work is simply decomposed into total_work/numPEs, 46 : : //! which yields the smallest number of Charm++ chares and the largest chunks 47 : : //! of work units. The other extreme is unity virtualization, which decomposes 48 : : //! the total work into the smallest size work units possible, yielding the 49 : : //! largest number of Charm++ chares. Obviously, the optimum will be between 50 : : //! 0.0 and 1.0, depending on the problem. 51 : : //! 52 : : //! The formula implemented uses a linear relationship between the 53 : : //! virtualization parameter and the number of work units with the extremes 54 : : //! described above. The formula is given by 55 : : //! 56 : : //! chunksize = (1 - n) * v + n; 57 : : //! 58 : : //! where 59 : : //! - v = degree of virtualization 60 : : //! - n = load/npes 61 : : //! - load = total work, e.g., number of particles, number of mesh cells 62 : : //! - npes = number of hardware processing elements 63 : : // ***************************************************************************** 64 : : { 65 : : Assert( virtualization > -std::numeric_limits< real >::epsilon() && 66 : : virtualization < 1.0+std::numeric_limits< real >::epsilon(), 67 : : "Virtualization parameter must be between [0.0...1.0]" ); 68 : : Assert( npe > 0, "Number of processing elements must be larger than zero" ); 69 : : 70 : : // Compute minimum number of work units 71 : 246 : const auto n = static_cast< real >( load ) / npe; 72 : : 73 : : // Compute work unit size based on the linear formula above 74 : 246 : chunksize = static_cast< uint64_t >( (1.0 - n) * virtualization + n ); 75 : : 76 : : Assert( load >= chunksize, "Load must be larger than chunksize" ); 77 : : 78 : : // Compute number of work units with size computed ignoring remainder 79 : 246 : uint64_t nchare = load / chunksize; 80 : : 81 : : // Compute remainder of work if the above number of units were to be created 82 : 246 : remainder = load - nchare * chunksize; 83 : : 84 : : // Redistribute remainder among the work units for a more equal distribution 85 : 246 : chunksize += remainder / nchare; 86 : : 87 : : // Compute new remainder (after redistribution of the previous remainder) 88 : 246 : remainder = load - nchare * chunksize; 89 : : 90 : : // Return number of work units (number of Charm++ chares) 91 : 246 : return nchare; 92 : : } 93 : : 94 : : } // tk::