HArD::Core2D
Hybrid Arbitrary Degree::Core 2D - Library to implement 2D schemes with edge and cell polynomials as unknowns
parallel_for.hpp
Go to the documentation of this file.
1 #ifndef PARALLEL_FOR
2 #define PARALLEL_FOR
3 
4 #include <thread>
5 #include <Eigen/Sparse>
6 
7 namespace HArDCore2D {
8 
15  static std::pair<std::vector<int>, std::vector<int>>
16  distributeLoad(size_t nb_elements, unsigned nb_threads)
17  {
18  // Vectors of start and end indices
19  std::vector<int> start(nb_threads);
20  std::vector<int> end(nb_threads);
21 
22  // Compute the batch size and the remainder
23  unsigned batch_size = nb_elements / nb_threads;
24  unsigned batch_remainder = nb_elements % nb_threads;
25 
26  // Distribute the remainder over the threads to get the start and end indices for each thread
27  for (unsigned i = 0; i < nb_threads; ++i) {
28  if (i < batch_remainder){
29  start[i] = i * batch_size + i;
30  end[i] = start[i] + batch_size + 1;
31  }
32  else{
33  start[i] = i * batch_size + batch_remainder;
34  end[i] = start[i] + batch_size;
35  }
36  }
37 
38  return std::make_pair(start, end);
39  }
40 
42  static inline void parallel_for(unsigned nb_elements,
43  std::function<void(size_t start, size_t end)> functor,
44  bool use_threads = true,
45  unsigned nb_threads_max = 1e9
46  )
47  {
48  unsigned nb_threads_hint = std::thread::hardware_concurrency();
49  unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
50  nb_threads = std::min(nb_threads_max, nb_threads);
51 
52  // Generate the start and end indices
53  auto [start, end] = distributeLoad(nb_elements, nb_threads);
54 
55  std::vector<std::thread> my_threads(nb_threads);
56 
57  if (use_threads) {
58  // Multithread execution
59  for (unsigned i = 0; i < nb_threads; ++i) {
60  my_threads[i] = std::thread(functor, start[i], end[i]);
61  }
62  } else {
63  // Single thread execution (for easy debugging)
64  for(unsigned i = 0; i < nb_threads; ++i) {
65  functor(start[i], end[i]);
66  }
67  }
68 
69  // Wait for the other thread to finish their task
70  if (use_threads) {
71  std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
72  }
73  }
74 
75 
77  static inline std::pair<Eigen::SparseMatrix<double>, Eigen::VectorXd>
79  size_t nb_elements, //< nb of elements over which the threading will be done
80  size_t size_system, //< size of the system to assemble
81  std::function<void(size_t start, size_t end, std::list<Eigen::Triplet<double>> * triplets, Eigen::VectorXd * rhs)> batch_local_assembly, //< procedure to compute all the local contributions to the matrix (put in the triplets) and rhs (put in the vector) between start and end points (e.g. elements indices)
82  bool use_threads = true //< determine if threaded process is used or not
83  )
84  {
85  // Matrix and rhs
86  Eigen::SparseMatrix<double> A(size_system, size_system);
87  Eigen::VectorXd b = Eigen::VectorXd::Zero(size_system);
88 
89  if (use_threads) {
90  // Select the number of threads
91  unsigned nb_threads_hint = std::thread::hardware_concurrency();
92  unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
93 
94  // Generate the start and end indices
95  auto [start, end] = distributeLoad(nb_elements, nb_threads);
96 
97  // Create vectors of triplets and vectors
98  std::vector<std::list<Eigen::Triplet<double> > > triplets(nb_threads);
99  std::vector<Eigen::VectorXd> rhs(nb_threads);
100 
101  for (unsigned i = 0; i < nb_threads; i++) {
102  rhs[i] = Eigen::VectorXd::Zero(size_system);
103  } // for i
104 
105  // Assign a task to each thread
106  std::vector<std::thread> my_threads(nb_threads);
107  for (unsigned i = 0; i < nb_threads; ++i) {
108  my_threads[i] = std::thread(batch_local_assembly, start[i], end[i], &triplets[i], &rhs[i]);
109  }
110 
111  // Wait for the other threads to finish their task
112  std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
113 
114  // Create matrix from triplets
115  size_t n_triplets = 0;
116  for (auto triplets_thread : triplets) {
117  n_triplets += triplets_thread.size();
118  }
119  std::vector<Eigen::Triplet<double> > all_triplets(n_triplets);
120  auto triplet_index = all_triplets.begin();
121  for (auto triplets_thread : triplets) {
122  triplet_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet_index);
123  }
124  A.setFromTriplets(all_triplets.begin(), all_triplets.end());
125  for (auto rhs_thread : rhs) {
126  b += rhs_thread;
127  }
128  } else {
129  std::list<Eigen::Triplet<double> > triplets;
130  batch_local_assembly(0, nb_elements, &triplets, &b);
131  A.setFromTriplets(triplets.begin(), triplets.end());
132  }
133 
134  return std::make_pair(A, b);
135  }
136 
138  static inline std::tuple<Eigen::SparseMatrix<double>, Eigen::VectorXd, Eigen::SparseMatrix<double>>
140  size_t nb_elements, //< nb of elements over which the threading will be done
141  size_t size_system1, //< size of the system 1 to assemble (must be square, corresponds to first matrix and vector=rhs)
142  std::pair<size_t, size_t> size_Mat2, //< size of the second matrix to assemble (can be rectangular)
143  std::function<void(size_t start, size_t end, std::list<Eigen::Triplet<double>> * triplets1, Eigen::VectorXd * vec1, std::list<Eigen::Triplet<double>> * triplets2)> batch_local_assembly, //< procedure to compute all the local contributions to the matrices (put in the triplets) and vectors between start and end points (e.g. elements indices)
144  bool use_threads = true //< determine if threaded process is used or not
145  )
146  {
147  // Matrices and vectors
148  Eigen::SparseMatrix<double> A1(size_system1, size_system1);
149  Eigen::VectorXd b1 = Eigen::VectorXd::Zero(size_system1);
150  Eigen::SparseMatrix<double> A2(size_Mat2.first, size_Mat2.second);
151 
152  if (use_threads) {
153  // Select the number of threads
154  unsigned nb_threads_hint = std::thread::hardware_concurrency();
155  unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
156 
157  // Generate the start and end indices
158  auto [start, end] = distributeLoad(nb_elements, nb_threads);
159 
160  // Create vectors of triplets and vectors
161  std::vector<std::list<Eigen::Triplet<double> > > triplets1(nb_threads);
162  std::vector<Eigen::VectorXd> vec1(nb_threads);
163  std::vector<std::list<Eigen::Triplet<double> > > triplets2(nb_threads);
164 
165  for (unsigned i = 0; i < nb_threads; i++) {
166  vec1[i] = Eigen::VectorXd::Zero(size_system1);
167  } // for i
168 
169  // Assign a task to each thread
170  std::vector<std::thread> my_threads(nb_threads);
171  for (unsigned i = 0; i < nb_threads; ++i) {
172  my_threads[i] = std::thread(batch_local_assembly, start[i], end[i], &triplets1[i], &vec1[i], &triplets2[i]);
173  }
174 
175  // Wait for the other threads to finish their task
176  std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
177 
178  // Create system 1 from triplets
179  size_t n_triplets1 = 0;
180  for (auto triplets_thread : triplets1) {
181  n_triplets1 += triplets_thread.size();
182  }
183  std::vector<Eigen::Triplet<double> > all_triplets1(n_triplets1);
184  auto triplet1_index = all_triplets1.begin();
185  for (auto triplets_thread : triplets1) {
186  triplet1_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet1_index);
187  }
188  A1.setFromTriplets(all_triplets1.begin(), all_triplets1.end());
189  for (auto vec_thread : vec1) {
190  b1 += vec_thread;
191  }
192 
193  // Create system 2 from triplets
194  size_t n_triplets2 = 0;
195  for (auto triplets_thread : triplets2) {
196  n_triplets2 += triplets_thread.size();
197  }
198  std::vector<Eigen::Triplet<double> > all_triplets2(n_triplets2);
199  auto triplet2_index = all_triplets2.begin();
200  for (auto triplets_thread : triplets2) {
201  triplet2_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet2_index);
202  }
203  A2.setFromTriplets(all_triplets2.begin(), all_triplets2.end());
204 
205 
206  } else {
207  std::list<Eigen::Triplet<double> > triplets1;
208  std::list<Eigen::Triplet<double> > triplets2;
209  batch_local_assembly(0, nb_elements, &triplets1, &b1, &triplets2);
210  A1.setFromTriplets(triplets1.begin(), triplets1.end());
211  A2.setFromTriplets(triplets2.begin(), triplets2.end());
212  }
213 
214  return std::make_tuple(A1, b1, A2);
215  }
216 
217 
219  static inline std::tuple<Eigen::SparseMatrix<double>, Eigen::VectorXd, Eigen::SparseMatrix<double>, Eigen::VectorXd>
221  size_t nb_elements, //< nb of elements over which the threading will be done
222  size_t size_system1, //< size of the system 1 to assemble (must be square, corresponds to first matrix and vector=rhs)
223  std::pair<size_t, size_t> size_Mat2, //< size of the second matrix to assemble (can be rectangular)
224  size_t size_b2, //< size of second vector
225  std::function<void(size_t start, size_t end, std::list<Eigen::Triplet<double>> * triplets1, Eigen::VectorXd * vec1, std::list<Eigen::Triplet<double>> * triplets2, Eigen::VectorXd * vec2)> batch_local_assembly, //< procedure to compute all the local contributions to the matrices (put in the triplets) and vectors between start and end points (e.g. elements indices)
226  bool use_threads = true //< determine if threaded process is used or not
227  )
228  {
229  // Matrices and vectors
230  Eigen::SparseMatrix<double> A1(size_system1, size_system1);
231  Eigen::VectorXd b1 = Eigen::VectorXd::Zero(size_system1);
232  Eigen::SparseMatrix<double> A2(size_Mat2.first, size_Mat2.second);
233  Eigen::VectorXd b2 = Eigen::VectorXd::Zero(size_b2);
234 
235  if (use_threads) {
236  // Select the number of threads
237  unsigned nb_threads_hint = std::thread::hardware_concurrency();
238  unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
239 
240  // Generate the start and end indices
241  auto [start, end] = distributeLoad(nb_elements, nb_threads);
242 
243  // Create vectors of triplets and vectors
244  std::vector<std::list<Eigen::Triplet<double> > > triplets1(nb_threads);
245  std::vector<Eigen::VectorXd> vec1(nb_threads);
246  std::vector<std::list<Eigen::Triplet<double> > > triplets2(nb_threads);
247  std::vector<Eigen::VectorXd> vec2(nb_threads);
248 
249  for (unsigned i = 0; i < nb_threads; i++) {
250  vec1[i] = Eigen::VectorXd::Zero(size_system1);
251  vec2[i] = Eigen::VectorXd::Zero(size_b2);
252  } // for i
253 
254  // Assign a task to each thread
255  std::vector<std::thread> my_threads(nb_threads);
256  for (unsigned i = 0; i < nb_threads; ++i) {
257  my_threads[i] = std::thread(batch_local_assembly, start[i], end[i], &triplets1[i], &vec1[i], &triplets2[i], &vec2[i]);
258  }
259 
260  // Wait for the other threads to finish their task
261  std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
262 
263  // Create system 1 from triplets
264  size_t n_triplets1 = 0;
265  for (auto triplets_thread : triplets1) {
266  n_triplets1 += triplets_thread.size();
267  }
268  std::vector<Eigen::Triplet<double> > all_triplets1(n_triplets1);
269  auto triplet1_index = all_triplets1.begin();
270  for (auto triplets_thread : triplets1) {
271  triplet1_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet1_index);
272  }
273  A1.setFromTriplets(all_triplets1.begin(), all_triplets1.end());
274  for (auto vec_thread : vec1) {
275  b1 += vec_thread;
276  }
277 
278  // Create system 2 from triplets
279  size_t n_triplets2 = 0;
280  for (auto triplets_thread : triplets2) {
281  n_triplets2 += triplets_thread.size();
282  }
283  std::vector<Eigen::Triplet<double> > all_triplets2(n_triplets2);
284  auto triplet2_index = all_triplets2.begin();
285  for (auto triplets_thread : triplets2) {
286  triplet2_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet2_index);
287  }
288  A2.setFromTriplets(all_triplets2.begin(), all_triplets2.end());
289  for (auto vec_thread : vec2) {
290  b2 += vec_thread;
291  }
292 
293 
294  } else {
295  std::list<Eigen::Triplet<double> > triplets1;
296  std::list<Eigen::Triplet<double> > triplets2;
297  batch_local_assembly(0, nb_elements, &triplets1, &b1, &triplets2, &b2);
298  A1.setFromTriplets(triplets1.begin(), triplets1.end());
299  A2.setFromTriplets(triplets2.begin(), triplets2.end());
300  }
301 
302  return std::make_tuple(A1, b1, A2, b2);
303  }
304 
306 
307 } // end of namespace HArDCore2D
308 #endif
Compute max and min eigenvalues of all matrices for i
Definition: compute_eigs.m:5
function[maxeig, mineig, cond, mat]
Definition: compute_eigs.m:1
end
Definition: compute_eigs.m:12
static void parallel_for(unsigned nb_elements, std::function< void(size_t start, size_t end)> functor, bool use_threads=true, unsigned nb_threads_max=1e9)
Generic function to execute threaded processes.
Definition: parallel_for.hpp:42
static std::pair< std::vector< int >, std::vector< int > > distributeLoad(size_t nb_elements, unsigned nb_threads)
Function to distribute elements (considered as jobs) over threads. It returns a pair of vectors indic...
Definition: parallel_for.hpp:16
static std::pair< Eigen::SparseMatrix< double >, Eigen::VectorXd > parallel_assembly_system(size_t nb_elements, size_t size_system, std::function< void(size_t start, size_t end, std::list< Eigen::Triplet< double >> *triplets, Eigen::VectorXd *rhs)> batch_local_assembly, bool use_threads=true)
Function to assemble global matrix and right-hand side from a procedure that computes local triplets ...
Definition: parallel_for.hpp:78
bool use_threads
Definition: HHO_DiffAdvecReac.hpp:47
A
Definition: mmread.m:102
Definition: ddr-klplate.hpp:27