HArD::Core2D
Hybrid Arbitrary Degree::Core 2D - Library to implement 2D schemes with edge and cell polynomials as unknowns
Loading...
Searching...
No Matches
parallel_for.hpp
Go to the documentation of this file.
1#ifndef PARALLEL_FOR
2#define PARALLEL_FOR
3
4#include <thread>
5#include <Eigen/Sparse>
6
7namespace HArDCore2D {
8
15 static std::pair<std::vector<int>, std::vector<int>>
16 distributeLoad(size_t nb_elements, unsigned nb_threads)
17 {
18 // Vectors of start and end indices
19 std::vector<int> start(nb_threads);
20 std::vector<int> end(nb_threads);
21
22 // Compute the batch size and the remainder
23 unsigned batch_size = nb_elements / nb_threads;
24 unsigned batch_remainder = nb_elements % nb_threads;
25
26 // Distribute the remainder over the threads to get the start and end indices for each thread
27 for (unsigned i = 0; i < nb_threads; ++i) {
28 if (i < batch_remainder){
29 start[i] = i * batch_size + i;
30 end[i] = start[i] + batch_size + 1;
31 }
32 else{
33 start[i] = i * batch_size + batch_remainder;
34 end[i] = start[i] + batch_size;
35 }
36 }
37
38 return std::make_pair(start, end);
39 }
40
42 static inline void parallel_for(unsigned nb_elements,
43 std::function<void(size_t start, size_t end)> functor,
44 bool use_threads = true,
45 unsigned nb_threads_max = 1e9
46 )
47 {
48 unsigned nb_threads_hint = std::thread::hardware_concurrency();
49 unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
50 nb_threads = std::min(nb_threads_max, nb_threads);
51
52 // Generate the start and end indices
53 auto [start, end] = distributeLoad(nb_elements, nb_threads);
54
55 std::vector<std::thread> my_threads(nb_threads);
56
57 if (use_threads) {
58 // Multithread execution
59 for (unsigned i = 0; i < nb_threads; ++i) {
60 my_threads[i] = std::thread(functor, start[i], end[i]);
61 }
62 } else {
63 // Single thread execution (for easy debugging)
64 for(unsigned i = 0; i < nb_threads; ++i) {
65 functor(start[i], end[i]);
66 }
67 }
68
69 // Wait for the other thread to finish their task
70 if (use_threads) {
71 std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
72 }
73 }
74
75
77 static inline std::pair<Eigen::SparseMatrix<double>, Eigen::VectorXd>
79 size_t nb_elements, //< nb of elements over which the threading will be done
80 size_t size_system, //< size of the system to assemble
81 std::function<void(size_t start, size_t end, std::list<Eigen::Triplet<double>> * triplets, Eigen::VectorXd * rhs)> batch_local_assembly, //< procedure to compute all the local contributions to the matrix (put in the triplets) and rhs (put in the vector) between start and end points (e.g. elements indices)
82 bool use_threads = true //< determine if threaded process is used or not
83 )
84 {
85 // Matrix and rhs
86 Eigen::SparseMatrix<double> A(size_system, size_system);
87 Eigen::VectorXd b = Eigen::VectorXd::Zero(size_system);
88
89 if (use_threads) {
90 // Select the number of threads
91 unsigned nb_threads_hint = std::thread::hardware_concurrency();
92 unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
93
94 // Generate the start and end indices
95 auto [start, end] = distributeLoad(nb_elements, nb_threads);
96
97 // Create vectors of triplets and vectors
98 std::vector<std::list<Eigen::Triplet<double> > > triplets(nb_threads);
99 std::vector<Eigen::VectorXd> rhs(nb_threads);
100
101 for (unsigned i = 0; i < nb_threads; i++) {
102 rhs[i] = Eigen::VectorXd::Zero(size_system);
103 } // for i
104
105 // Assign a task to each thread
106 std::vector<std::thread> my_threads(nb_threads);
107 for (unsigned i = 0; i < nb_threads; ++i) {
108 my_threads[i] = std::thread(batch_local_assembly, start[i], end[i], &triplets[i], &rhs[i]);
109 }
110
111 // Wait for the other threads to finish their task
112 std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
113
114 // Create matrix from triplets
115 size_t n_triplets = 0;
116 for (auto triplets_thread : triplets) {
117 n_triplets += triplets_thread.size();
118 }
119 std::vector<Eigen::Triplet<double> > all_triplets(n_triplets);
120 auto triplet_index = all_triplets.begin();
121 for (auto triplets_thread : triplets) {
122 triplet_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet_index);
123 }
124 A.setFromTriplets(all_triplets.begin(), all_triplets.end());
125 for (auto rhs_thread : rhs) {
126 b += rhs_thread;
127 }
128 } else {
129 std::list<Eigen::Triplet<double> > triplets;
130 batch_local_assembly(0, nb_elements, &triplets, &b);
131 A.setFromTriplets(triplets.begin(), triplets.end());
132 }
133
134 return std::make_pair(A, b);
135 }
136
138 static inline std::tuple<Eigen::SparseMatrix<double>, Eigen::VectorXd, Eigen::SparseMatrix<double>>
140 size_t nb_elements, //< nb of elements over which the threading will be done
141 size_t size_system1, //< size of the system 1 to assemble (must be square, corresponds to first matrix and vector=rhs)
142 std::pair<size_t, size_t> size_Mat2, //< size of the second matrix to assemble (can be rectangular)
143 std::function<void(size_t start, size_t end, std::list<Eigen::Triplet<double>> * triplets1, Eigen::VectorXd * vec1, std::list<Eigen::Triplet<double>> * triplets2)> batch_local_assembly, //< procedure to compute all the local contributions to the matrices (put in the triplets) and vectors between start and end points (e.g. elements indices)
144 bool use_threads = true //< determine if threaded process is used or not
145 )
146 {
147 // Matrices and vectors
148 Eigen::SparseMatrix<double> A1(size_system1, size_system1);
149 Eigen::VectorXd b1 = Eigen::VectorXd::Zero(size_system1);
150 Eigen::SparseMatrix<double> A2(size_Mat2.first, size_Mat2.second);
151
152 if (use_threads) {
153 // Select the number of threads
154 unsigned nb_threads_hint = std::thread::hardware_concurrency();
155 unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
156
157 // Generate the start and end indices
158 auto [start, end] = distributeLoad(nb_elements, nb_threads);
159
160 // Create vectors of triplets and vectors
161 std::vector<std::list<Eigen::Triplet<double> > > triplets1(nb_threads);
162 std::vector<Eigen::VectorXd> vec1(nb_threads);
163 std::vector<std::list<Eigen::Triplet<double> > > triplets2(nb_threads);
164
165 for (unsigned i = 0; i < nb_threads; i++) {
166 vec1[i] = Eigen::VectorXd::Zero(size_system1);
167 } // for i
168
169 // Assign a task to each thread
170 std::vector<std::thread> my_threads(nb_threads);
171 for (unsigned i = 0; i < nb_threads; ++i) {
172 my_threads[i] = std::thread(batch_local_assembly, start[i], end[i], &triplets1[i], &vec1[i], &triplets2[i]);
173 }
174
175 // Wait for the other threads to finish their task
176 std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
177
178 // Create system 1 from triplets
179 size_t n_triplets1 = 0;
180 for (auto triplets_thread : triplets1) {
181 n_triplets1 += triplets_thread.size();
182 }
183 std::vector<Eigen::Triplet<double> > all_triplets1(n_triplets1);
184 auto triplet1_index = all_triplets1.begin();
185 for (auto triplets_thread : triplets1) {
186 triplet1_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet1_index);
187 }
188 A1.setFromTriplets(all_triplets1.begin(), all_triplets1.end());
189 for (auto vec_thread : vec1) {
190 b1 += vec_thread;
191 }
192
193 // Create system 2 from triplets
194 size_t n_triplets2 = 0;
195 for (auto triplets_thread : triplets2) {
196 n_triplets2 += triplets_thread.size();
197 }
198 std::vector<Eigen::Triplet<double> > all_triplets2(n_triplets2);
199 auto triplet2_index = all_triplets2.begin();
200 for (auto triplets_thread : triplets2) {
201 triplet2_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet2_index);
202 }
203 A2.setFromTriplets(all_triplets2.begin(), all_triplets2.end());
204
205
206 } else {
207 std::list<Eigen::Triplet<double> > triplets1;
208 std::list<Eigen::Triplet<double> > triplets2;
209 batch_local_assembly(0, nb_elements, &triplets1, &b1, &triplets2);
210 A1.setFromTriplets(triplets1.begin(), triplets1.end());
211 A2.setFromTriplets(triplets2.begin(), triplets2.end());
212 }
213
214 return std::make_tuple(A1, b1, A2);
215 }
216
217
219 static inline std::tuple<Eigen::SparseMatrix<double>, Eigen::VectorXd, Eigen::SparseMatrix<double>, Eigen::VectorXd>
221 size_t nb_elements, //< nb of elements over which the threading will be done
222 size_t size_system1, //< size of the system 1 to assemble (must be square, corresponds to first matrix and vector=rhs)
223 std::pair<size_t, size_t> size_Mat2, //< size of the second matrix to assemble (can be rectangular)
224 size_t size_b2, //< size of second vector
225 std::function<void(size_t start, size_t end, std::list<Eigen::Triplet<double>> * triplets1, Eigen::VectorXd * vec1, std::list<Eigen::Triplet<double>> * triplets2, Eigen::VectorXd * vec2)> batch_local_assembly, //< procedure to compute all the local contributions to the matrices (put in the triplets) and vectors between start and end points (e.g. elements indices)
226 bool use_threads = true //< determine if threaded process is used or not
227 )
228 {
229 // Matrices and vectors
230 Eigen::SparseMatrix<double> A1(size_system1, size_system1);
231 Eigen::VectorXd b1 = Eigen::VectorXd::Zero(size_system1);
232 Eigen::SparseMatrix<double> A2(size_Mat2.first, size_Mat2.second);
233 Eigen::VectorXd b2 = Eigen::VectorXd::Zero(size_b2);
234
235 if (use_threads) {
236 // Select the number of threads
237 unsigned nb_threads_hint = std::thread::hardware_concurrency();
238 unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
239
240 // Generate the start and end indices
241 auto [start, end] = distributeLoad(nb_elements, nb_threads);
242
243 // Create vectors of triplets and vectors
244 std::vector<std::list<Eigen::Triplet<double> > > triplets1(nb_threads);
245 std::vector<Eigen::VectorXd> vec1(nb_threads);
246 std::vector<std::list<Eigen::Triplet<double> > > triplets2(nb_threads);
247 std::vector<Eigen::VectorXd> vec2(nb_threads);
248
249 for (unsigned i = 0; i < nb_threads; i++) {
250 vec1[i] = Eigen::VectorXd::Zero(size_system1);
251 vec2[i] = Eigen::VectorXd::Zero(size_b2);
252 } // for i
253
254 // Assign a task to each thread
255 std::vector<std::thread> my_threads(nb_threads);
256 for (unsigned i = 0; i < nb_threads; ++i) {
257 my_threads[i] = std::thread(batch_local_assembly, start[i], end[i], &triplets1[i], &vec1[i], &triplets2[i], &vec2[i]);
258 }
259
260 // Wait for the other threads to finish their task
261 std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
262
263 // Create system 1 from triplets
264 size_t n_triplets1 = 0;
265 for (auto triplets_thread : triplets1) {
266 n_triplets1 += triplets_thread.size();
267 }
268 std::vector<Eigen::Triplet<double> > all_triplets1(n_triplets1);
269 auto triplet1_index = all_triplets1.begin();
270 for (auto triplets_thread : triplets1) {
271 triplet1_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet1_index);
272 }
273 A1.setFromTriplets(all_triplets1.begin(), all_triplets1.end());
274 for (auto vec_thread : vec1) {
275 b1 += vec_thread;
276 }
277
278 // Create system 2 from triplets
279 size_t n_triplets2 = 0;
280 for (auto triplets_thread : triplets2) {
281 n_triplets2 += triplets_thread.size();
282 }
283 std::vector<Eigen::Triplet<double> > all_triplets2(n_triplets2);
284 auto triplet2_index = all_triplets2.begin();
285 for (auto triplets_thread : triplets2) {
286 triplet2_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet2_index);
287 }
288 A2.setFromTriplets(all_triplets2.begin(), all_triplets2.end());
289 for (auto vec_thread : vec2) {
290 b2 += vec_thread;
291 }
292
293
294 } else {
295 std::list<Eigen::Triplet<double> > triplets1;
296 std::list<Eigen::Triplet<double> > triplets2;
297 batch_local_assembly(0, nb_elements, &triplets1, &b1, &triplets2, &b2);
298 A1.setFromTriplets(triplets1.begin(), triplets1.end());
299 A2.setFromTriplets(triplets2.begin(), triplets2.end());
300 }
301
302 return std::make_tuple(A1, b1, A2, b2);
303 }
304
306
307} // end of namespace HArDCore2D
308#endif
Compute max and min eigenvalues of all matrices for i
Definition compute_eigs.m:5
end
Definition compute_eigs.m:12
static void parallel_for(unsigned nb_elements, std::function< void(size_t start, size_t end)> functor, bool use_threads=true, unsigned nb_threads_max=1e9)
Generic function to execute threaded processes.
Definition parallel_for.hpp:42
static std::pair< std::vector< int >, std::vector< int > > distributeLoad(size_t nb_elements, unsigned nb_threads)
Function to distribute elements (considered as jobs) over threads. It returns a pair of vectors indic...
Definition parallel_for.hpp:16
static std::pair< Eigen::SparseMatrix< double >, Eigen::VectorXd > parallel_assembly_system(size_t nb_elements, size_t size_system, std::function< void(size_t start, size_t end, std::list< Eigen::Triplet< double > > *triplets, Eigen::VectorXd *rhs)> batch_local_assembly, bool use_threads=true)
Function to assemble global matrix and right-hand side from a procedure that computes local triplets ...
Definition parallel_for.hpp:78
bool use_threads
Definition HHO_DiffAdvecReac.hpp:47
A
Definition mmread.m:102
Definition ddr-klplate.hpp:27