Branch data Line data Source code
1 : : // *****************************************************************************
2 : : /*!
3 : : \file src/Base/LoadDistributor.cpp
4 : : \copyright 2012-2015 J. Bakosi,
5 : : 2016-2018 Los Alamos National Security, LLC.,
6 : : 2019-2021 Triad National Security, LLC.,
7 : : 2022-2025 J. Bakosi
8 : : All rights reserved. See the LICENSE file for details.
9 : : \brief Load distributors
10 : : \details Load distributors compute chunksize based on the degree of
11 : : virtualization.
12 : : */
13 : : // *****************************************************************************
14 : :
15 : : #include <limits>
16 : :
17 : : #include "Types.hpp"
18 : : #include "LoadDistributor.hpp"
19 : : #include "Exception.hpp"
20 : :
21 : : namespace tk {
22 : :
23 : : uint64_t
24 : 262 : linearLoadDistributor( real virtualization,
25 : : uint64_t load,
26 : : int npe,
27 : : uint64_t& chunksize,
28 : : uint64_t& remainder )
29 : : // *****************************************************************************
30 : : // Compute linear load distribution for given total work and virtualization
31 : : //! \param[in] virtualization Degree of virtualization [0.0...1.0]
32 : : //! \param[in] load Total load, e.g., number of particles, number of mesh cells
33 : : //! \param[in] npe Number of processing elements to distribute the load to
34 : : //! \param[inout] chunksize Chunk size, see detailed description
35 : : //! \param[inout] remainder Remainder, see detailed description
36 : : //! \return Number of work units
37 : : //! \details Compute load distibution (number of chares and chunksize) based on
38 : : //! total work (e.g., total number of particles) and virtualization
39 : : //!
40 : : //! The virtualization parameter, specified by the user, is a real number
41 : : //! between 0.0 and 1.0, inclusive, which controls the degree of
42 : : //! virtualization or over-decomposition. Independent of the value of
43 : : //! virtualization the work is approximately evenly distributed among the
44 : : //! available processing elements, given by npe. For zero virtualization (no
45 : : //! over-decomposition), the work is simply decomposed into total_work/numPEs,
46 : : //! which yields the smallest number of Charm++ chares and the largest chunks
47 : : //! of work units. The other extreme is unity virtualization, which decomposes
48 : : //! the total work into the smallest size work units possible, yielding the
49 : : //! largest number of Charm++ chares. Obviously, the optimum will be between
50 : : //! 0.0 and 1.0, depending on the problem.
51 : : //!
52 : : //! The formula implemented uses a linear relationship between the
53 : : //! virtualization parameter and the number of work units with the extremes
54 : : //! described above. The formula is given by
55 : : //!
56 : : //! chunksize = (1 - n) * v + n;
57 : : //!
58 : : //! where
59 : : //! - v = degree of virtualization
60 : : //! - n = load/npes
61 : : //! - load = total work, e.g., number of particles, number of mesh cells
62 : : //! - npes = number of hardware processing elements
63 : : // *****************************************************************************
64 : : {
65 [ + + ][ + + ]: 274 : Assert( virtualization > -std::numeric_limits< real >::epsilon() &&
[ + - ][ + - ]
[ + - ]
66 : : virtualization < 1.0+std::numeric_limits< real >::epsilon(),
67 : : "Virtualization parameter must be between [0.0...1.0]" );
68 [ + + ][ + - ]: 266 : Assert( npe > 0, "Number of processing elements must be larger than zero" );
[ + - ][ + - ]
69 : :
70 : : // Compute minimum number of work units
71 : 259 : const auto n = static_cast< real >( load ) / npe;
72 : :
73 : : // Compute work unit size based on the linear formula above
74 : 259 : chunksize = static_cast< uint64_t >( (1.0 - n) * virtualization + n );
75 : :
76 [ - + ][ - - ]: 259 : Assert( load >= chunksize, "Load must be larger than chunksize" );
[ - - ][ - - ]
77 : :
78 : : // Compute number of work units with size computed ignoring remainder
79 : 259 : uint64_t nchare = load / chunksize;
80 : :
81 : : // Compute remainder of work if the above number of units were to be created
82 : 259 : remainder = load - nchare * chunksize;
83 : :
84 : : // Redistribute remainder among the work units for a more equal distribution
85 : 259 : chunksize += remainder / nchare;
86 : :
87 : : // Compute new remainder (after redistribution of the previous remainder)
88 : 259 : remainder = load - nchare * chunksize;
89 : :
90 : : // Return number of work units (number of Charm++ chares)
91 : 259 : return nchare;
92 : : }
93 : :
94 : : } // tk::
|