Line data Source code
1 : // *****************************************************************************
2 : /*!
3 : \file src/Base/LoadDistributor.cpp
4 : \copyright 2012-2015 J. Bakosi,
5 : 2016-2018 Los Alamos National Security, LLC.,
6 : 2019-2021 Triad National Security, LLC.,
7 : 2022-2025 J. Bakosi
8 : All rights reserved. See the LICENSE file for details.
9 : \brief Load distributors
10 : \details Load distributors compute chunksize based on the degree of
11 : virtualization.
12 : */
13 : // *****************************************************************************
14 :
15 : #include <limits>
16 :
17 : #include "Types.hpp"
18 : #include "LoadDistributor.hpp"
19 : #include "Exception.hpp"
20 :
21 : namespace tk {
22 :
23 : uint64_t
24 259 : linearLoadDistributor( real virtualization,
25 : uint64_t load,
26 : int npe,
27 : uint64_t& chunksize,
28 : uint64_t& remainder )
29 : // *****************************************************************************
30 : // Compute linear load distribution for given total work and virtualization
31 : //! \param[in] virtualization Degree of virtualization [0.0...1.0]
32 : //! \param[in] load Total load, e.g., number of particles, number of mesh cells
33 : //! \param[in] npe Number of processing elements to distribute the load to
34 : //! \param[inout] chunksize Chunk size, see detailed description
35 : //! \param[inout] remainder Remainder, see detailed description
36 : //! \return Number of work units
37 : //! \details Compute load distibution (number of chares and chunksize) based on
38 : //! total work (e.g., total number of particles) and virtualization
39 : //!
40 : //! The virtualization parameter, specified by the user, is a real number
41 : //! between 0.0 and 1.0, inclusive, which controls the degree of
42 : //! virtualization or over-decomposition. Independent of the value of
43 : //! virtualization the work is approximately evenly distributed among the
44 : //! available processing elements, given by npe. For zero virtualization (no
45 : //! over-decomposition), the work is simply decomposed into total_work/numPEs,
46 : //! which yields the smallest number of Charm++ chares and the largest chunks
47 : //! of work units. The other extreme is unity virtualization, which decomposes
48 : //! the total work into the smallest size work units possible, yielding the
49 : //! largest number of Charm++ chares. Obviously, the optimum will be between
50 : //! 0.0 and 1.0, depending on the problem.
51 : //!
52 : //! The formula implemented uses a linear relationship between the
53 : //! virtualization parameter and the number of work units with the extremes
54 : //! described above. The formula is given by
55 : //!
56 : //! chunksize = (1 - n) * v + n;
57 : //!
58 : //! where
59 : //! - v = degree of virtualization
60 : //! - n = load/npes
61 : //! - load = total work, e.g., number of particles, number of mesh cells
62 : //! - npes = number of hardware processing elements
63 : // *****************************************************************************
64 : {
65 : Assert( virtualization > -std::numeric_limits< real >::epsilon() &&
66 : virtualization < 1.0+std::numeric_limits< real >::epsilon(),
67 : "Virtualization parameter must be between [0.0...1.0]" );
68 : Assert( npe > 0, "Number of processing elements must be larger than zero" );
69 :
70 : // Compute minimum number of work units
71 259 : const auto n = static_cast< real >( load ) / npe;
72 :
73 : // Compute work unit size based on the linear formula above
74 259 : chunksize = static_cast< uint64_t >( (1.0 - n) * virtualization + n );
75 :
76 : Assert( load >= chunksize, "Load must be larger than chunksize" );
77 :
78 : // Compute number of work units with size computed ignoring remainder
79 259 : uint64_t nchare = load / chunksize;
80 :
81 : // Compute remainder of work if the above number of units were to be created
82 259 : remainder = load - nchare * chunksize;
83 :
84 : // Redistribute remainder among the work units for a more equal distribution
85 259 : chunksize += remainder / nchare;
86 :
87 : // Compute new remainder (after redistribution of the previous remainder)
88 259 : remainder = load - nchare * chunksize;
89 :
90 : // Return number of work units (number of Charm++ chares)
91 259 : return nchare;
92 : }
93 :
94 : } // tk::
|