43 std::function<
void(
size_t start,
size_t end)> functor,
45 unsigned nb_threads_max = 1e9
48 unsigned nb_threads_hint = std::thread::hardware_concurrency();
49 unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
50 nb_threads = std::min(nb_threads_max, nb_threads);
55 std::vector<std::thread> my_threads(nb_threads);
59 for (
unsigned i = 0;
i < nb_threads; ++
i) {
60 my_threads[
i] = std::thread(functor, start[
i],
end[
i]);
64 for(
unsigned i = 0;
i < nb_threads; ++
i) {
65 functor(start[
i],
end[
i]);
71 std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
81 std::function<
void(
size_t start,
size_t end, std::list<Eigen::Triplet<double>> * triplets, Eigen::VectorXd * rhs)> batch_local_assembly,
86 Eigen::SparseMatrix<double>
A(size_system, size_system);
87 Eigen::VectorXd b = Eigen::VectorXd::Zero(size_system);
91 unsigned nb_threads_hint = std::thread::hardware_concurrency();
92 unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
98 std::vector<std::list<Eigen::Triplet<double> > > triplets(nb_threads);
99 std::vector<Eigen::VectorXd> rhs(nb_threads);
101 for (
unsigned i = 0;
i < nb_threads;
i++) {
102 rhs[
i] = Eigen::VectorXd::Zero(size_system);
106 std::vector<std::thread> my_threads(nb_threads);
107 for (
unsigned i = 0;
i < nb_threads; ++
i) {
108 my_threads[
i] = std::thread(batch_local_assembly, start[
i],
end[
i], &triplets[
i], &rhs[
i]);
112 std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
115 size_t n_triplets = 0;
116 for (
auto triplets_thread : triplets) {
117 n_triplets += triplets_thread.size();
119 std::vector<Eigen::Triplet<double> > all_triplets(n_triplets);
120 auto triplet_index = all_triplets.begin();
121 for (
auto triplets_thread : triplets) {
122 triplet_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet_index);
124 A.setFromTriplets(all_triplets.begin(), all_triplets.end());
125 for (
auto rhs_thread : rhs) {
129 std::list<Eigen::Triplet<double> > triplets;
130 batch_local_assembly(0, nb_elements, &triplets, &b);
131 A.setFromTriplets(triplets.begin(), triplets.end());
134 return std::make_pair(
A, b);
142 std::pair<size_t, size_t> size_Mat2,
143 std::function<
void(
size_t start,
size_t end, std::list<Eigen::Triplet<double>> * triplets1, Eigen::VectorXd * vec1, std::list<Eigen::Triplet<double>> * triplets2)> batch_local_assembly,
148 Eigen::SparseMatrix<double> A1(size_system1, size_system1);
149 Eigen::VectorXd b1 = Eigen::VectorXd::Zero(size_system1);
150 Eigen::SparseMatrix<double> A2(size_Mat2.first, size_Mat2.second);
154 unsigned nb_threads_hint = std::thread::hardware_concurrency();
155 unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
161 std::vector<std::list<Eigen::Triplet<double> > > triplets1(nb_threads);
162 std::vector<Eigen::VectorXd> vec1(nb_threads);
163 std::vector<std::list<Eigen::Triplet<double> > > triplets2(nb_threads);
165 for (
unsigned i = 0;
i < nb_threads;
i++) {
166 vec1[
i] = Eigen::VectorXd::Zero(size_system1);
170 std::vector<std::thread> my_threads(nb_threads);
171 for (
unsigned i = 0;
i < nb_threads; ++
i) {
172 my_threads[
i] = std::thread(batch_local_assembly, start[
i],
end[
i], &triplets1[
i], &vec1[
i], &triplets2[
i]);
176 std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
179 size_t n_triplets1 = 0;
180 for (
auto triplets_thread : triplets1) {
181 n_triplets1 += triplets_thread.size();
183 std::vector<Eigen::Triplet<double> > all_triplets1(n_triplets1);
184 auto triplet1_index = all_triplets1.begin();
185 for (
auto triplets_thread : triplets1) {
186 triplet1_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet1_index);
188 A1.setFromTriplets(all_triplets1.begin(), all_triplets1.end());
189 for (
auto vec_thread : vec1) {
194 size_t n_triplets2 = 0;
195 for (
auto triplets_thread : triplets2) {
196 n_triplets2 += triplets_thread.size();
198 std::vector<Eigen::Triplet<double> > all_triplets2(n_triplets2);
199 auto triplet2_index = all_triplets2.begin();
200 for (
auto triplets_thread : triplets2) {
201 triplet2_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet2_index);
203 A2.setFromTriplets(all_triplets2.begin(), all_triplets2.end());
207 std::list<Eigen::Triplet<double> > triplets1;
208 std::list<Eigen::Triplet<double> > triplets2;
209 batch_local_assembly(0, nb_elements, &triplets1, &b1, &triplets2);
210 A1.setFromTriplets(triplets1.begin(), triplets1.end());
211 A2.setFromTriplets(triplets2.begin(), triplets2.end());
214 return std::make_tuple(A1, b1, A2);
223 std::pair<size_t, size_t> size_Mat2,
225 std::function<
void(
size_t start,
size_t end, std::list<Eigen::Triplet<double>> * triplets1, Eigen::VectorXd * vec1, std::list<Eigen::Triplet<double>> * triplets2, Eigen::VectorXd * vec2)> batch_local_assembly,
230 Eigen::SparseMatrix<double> A1(size_system1, size_system1);
231 Eigen::VectorXd b1 = Eigen::VectorXd::Zero(size_system1);
232 Eigen::SparseMatrix<double> A2(size_Mat2.first, size_Mat2.second);
233 Eigen::VectorXd b2 = Eigen::VectorXd::Zero(size_b2);
237 unsigned nb_threads_hint = std::thread::hardware_concurrency();
238 unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
244 std::vector<std::list<Eigen::Triplet<double> > > triplets1(nb_threads);
245 std::vector<Eigen::VectorXd> vec1(nb_threads);
246 std::vector<std::list<Eigen::Triplet<double> > > triplets2(nb_threads);
247 std::vector<Eigen::VectorXd> vec2(nb_threads);
249 for (
unsigned i = 0;
i < nb_threads;
i++) {
250 vec1[
i] = Eigen::VectorXd::Zero(size_system1);
251 vec2[
i] = Eigen::VectorXd::Zero(size_b2);
255 std::vector<std::thread> my_threads(nb_threads);
256 for (
unsigned i = 0;
i < nb_threads; ++
i) {
257 my_threads[
i] = std::thread(batch_local_assembly, start[
i],
end[
i], &triplets1[
i], &vec1[
i], &triplets2[
i], &vec2[
i]);
261 std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
264 size_t n_triplets1 = 0;
265 for (
auto triplets_thread : triplets1) {
266 n_triplets1 += triplets_thread.size();
268 std::vector<Eigen::Triplet<double> > all_triplets1(n_triplets1);
269 auto triplet1_index = all_triplets1.begin();
270 for (
auto triplets_thread : triplets1) {
271 triplet1_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet1_index);
273 A1.setFromTriplets(all_triplets1.begin(), all_triplets1.end());
274 for (
auto vec_thread : vec1) {
279 size_t n_triplets2 = 0;
280 for (
auto triplets_thread : triplets2) {
281 n_triplets2 += triplets_thread.size();
283 std::vector<Eigen::Triplet<double> > all_triplets2(n_triplets2);
284 auto triplet2_index = all_triplets2.begin();
285 for (
auto triplets_thread : triplets2) {
286 triplet2_index = std::copy(triplets_thread.begin(), triplets_thread.end(), triplet2_index);
288 A2.setFromTriplets(all_triplets2.begin(), all_triplets2.end());
289 for (
auto vec_thread : vec2) {
295 std::list<Eigen::Triplet<double> > triplets1;
296 std::list<Eigen::Triplet<double> > triplets2;
297 batch_local_assembly(0, nb_elements, &triplets1, &b1, &triplets2, &b2);
298 A1.setFromTriplets(triplets1.begin(), triplets1.end());
299 A2.setFromTriplets(triplets2.begin(), triplets2.end());
302 return std::make_tuple(A1, b1, A2, b2);
static void parallel_for(unsigned nb_elements, std::function< void(size_t start, size_t end)> functor, bool use_threads=true, unsigned nb_threads_max=1e9)
Generic function to execute threaded processes.
Definition parallel_for.hpp:42
static std::pair< std::vector< int >, std::vector< int > > distributeLoad(size_t nb_elements, unsigned nb_threads)
Function to distribute elements (considered as jobs) over threads. It returns a pair of vectors indic...
Definition parallel_for.hpp:16
static std::pair< Eigen::SparseMatrix< double >, Eigen::VectorXd > parallel_assembly_system(size_t nb_elements, size_t size_system, std::function< void(size_t start, size_t end, std::list< Eigen::Triplet< double > > *triplets, Eigen::VectorXd *rhs)> batch_local_assembly, bool use_threads=true)
Function to assemble global matrix and right-hand side from a procedure that computes local triplets ...
Definition parallel_for.hpp:78