GetFEM  5.5
getfem_accumulated_distro.h
Go to the documentation of this file.
1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2018-2026 Andriy Andreykiv
5 
6  This file is a part of GetFEM
7 
8  GetFEM is free software; you can redistribute it and/or modify it
9  under the terms of the GNU Lesser General Public License as published
10  by the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You should have received a copy of the GNU Lesser General Public License
18  along with this program. If not, see https://www.gnu.org/licenses/.
19 
20  As a special exception, you may use this file as it is a part of a free
21  software library without restriction. Specifically, if other files
22  instantiate templates or use macros or inline functions from this file,
23  or you compile this file and link it with other files to produce an
24  executable, this file does not by itself cause the resulting executable
25  to be covered by the GNU Lesser General Public License. This exception
26  does not however invalidate any other reasons why the executable file
27  might be covered by the GNU Lesser General Public License.
28 
29 ===========================================================================*/
30 
31 /**@file getfem_accumulated_distro.h
32 @author Andriy Andreykiv <andriy.andreykiv@gmail.com>
33 @date November 21, 2018.
34 @brief Distribution of assembly results (matrices/vectors) for parallel
35  assembly.
36 
37 This is the kernel of getfem.
38 */
39 #pragma once
40 
41 #include <gmm/gmm_def.h>
42 
43 #include "getfem_omp.h"
44 
45 namespace getfem
46 {
47 
48 namespace detail {
49 
50  //T is a single vector
51  template<class T>
52  void add_spec(const T &a, T &b,
53  gmm::abstract_null_type, gmm::abstract_vector){
54  gmm::add(a, b);
55  }
56 
57  //T is a single matrix
58  template<class T>
59  void add_spec(const T &a, T &b,
60  gmm::abstract_null_type, gmm::abstract_matrix){
61  gmm::add(a, b);
62  }
63 
64  //T is a vector of either matrices or vectors
65  template<class T>
66  void add_list(const T &a, T &b){
67  GMM_ASSERT2(a.size() == b.size(), "size mismatch");
68  auto ita = begin(a);
69  auto itb = begin(b);
70  auto ita_end = end(a);
71  for (;ita != ita_end; ++ita, ++itb) gmm::add(*ita, *itb);
72  }
73 
74  //T is a vector of vectors
75  template<class T>
76  void add_spec(const T &a, T &b,
77  gmm::abstract_vector, gmm::abstract_vector){
78  add_list(a, b);
79  }
80 
81  //T is a vector of matrices
82  template<class T>
83  void add_spec(const T &a, T &b,
84  gmm::abstract_matrix, gmm::abstract_vector){
85  add_list(a, b);
86  }
87 
88  //T is a single vector
89  template<class T>
90  void equal_resize_spec(T &a, const T &b,
91  gmm::abstract_null_type, gmm::abstract_vector){
92  gmm::resize(a, gmm::vect_size(b));
93  }
94 
95  //T is a single matrix
96  template<class T>
97  void equal_resize_spec(T &a, const T &b,
98  gmm::abstract_null_type, gmm::abstract_matrix){
99  gmm::resize(a, gmm::mat_nrows(b), gmm::mat_ncols(b));
100  }
101 
102  //T is a vector of either matrices or vectors
103  template<class T>
104  void equal_resize_list(T &a, const T &b){
105  GMM_ASSERT2(a.empty(), "the first list should be still empty");
106  a.resize(b.size());
107  auto ita = begin(a);
108  auto itb = begin(b);
109  auto ita_end = end(a);
110  using Component = typename T::value_type;
111  using AlgoC = typename gmm::linalg_traits<Component>::linalg_type;
112  for (;ita != ita_end; ++ita, ++itb){
113  equal_resize_spec(*ita, *itb, gmm::abstract_null_type{}, AlgoC{});
114  }
115  }
116 
117  //T is a vector of vectors
118  template<class T>
119  void equal_resize_spec(T &a, const T &b,
120  gmm::abstract_vector, gmm::abstract_vector){
121  equal_resize_list(a, b);
122  }
123 
124  //T is a vector of matrices
125  template<class T>
126  void equal_resize_spec(T &a, const T &b,
127  gmm::abstract_matrix, gmm::abstract_vector){
128  equal_resize_list(a, b);
129  }
130 
131 } // namespace detail
132 
133  /**Generic addition for gmm types as well as vectors of gmm types*/
134  template<class T>
135  void gen_add(const T &a, T &b){
136  using AlgoT = typename gmm::linalg_traits<T>::linalg_type;
137  using Component = typename T::value_type;
138  using AlgoC = typename gmm::linalg_traits<Component>::linalg_type;
139  detail::add_spec(a, b, AlgoC{}, AlgoT{});
140  }
141 
142  /**Resize 'a' to the same size as 'b'.
143  a and b can be matrices, vectors, or lists of matrices or vectors
144  */
145  template<class T>
146  void equal_resize(T &a, const T &b){
147  using AlgoT = typename gmm::linalg_traits<T>::linalg_type;
148  using Component = typename T::value_type;
149  using AlgoC = typename gmm::linalg_traits<Component>::linalg_type;
150  detail::equal_resize_spec(a, b, AlgoC{}, AlgoT{});
151  }
152 
153  /**Takes a matrix or vector, or vector of matrices or vectors and
154  creates an empty copy on each thread. When the
155  thread computations are done (in the destructor), accumulates
156  the assembled copies into the original*/
157  template <class T>
159  {
160  T& original;
161  omp_distribute<T> distributed;
162 
163  public:
164 
165  explicit accumulated_distro(T& l)
166  : original{l}{
167  if (distributed.num_threads() == 1) return;
168  //intentionally skipping thread 0, as accumulated_distro will
169  //use original for it
170  for(size_type t = 1; t != distributed.num_threads(); ++t){
171  equal_resize(distributed(t), original);
172  }
173  }
174 
175  T& get(){
176  if (distributed.num_threads() == 1 ||
177  distributed.this_thread() == 0) return original;
178  else return distributed;
179  }
180 
181  operator T&(){ // implicit conversion
182  return get();
183  }
184 
185  T& operator = (const T &x){
186  return distributed = x;
187  }
188 
190  if (distributed.num_threads() == 1) return;
191 
192  if (me_is_multithreaded_now()) {
193  // GMM_ASSERT1 not convenient here
194  cerr << "Accumulation distribution should not run in parallel";
195  exit(1);
196  }
197 
198  using namespace std;
199  auto to_add = vector<T*>{};
200  to_add.push_back(&original);
201  for (size_type t = 1; t != distributed.num_threads(); ++t){
202  to_add.push_back(&distributed(t));
203  }
204 
205  //Accumulation in parallel.
206  //Adding, for instance, elements 1 to 0, 2 to 3, 5 to 4 and 7 to 6
207  //on separate 4 threads in case of parallelization of the assembly
208  //on 8 threads.
209  while (to_add.size() > 1){
211  auto i = distributed.this_thread() * 2;
212  if (i + 1 < to_add.size()){
213  auto &target = *to_add[i];
214  auto &source = *to_add[i + 1];
215  gen_add(source, target);
216  }
217  )
218  //erase every second item , as it was already added
219  for (auto it = begin(to_add), ite = end(to_add);
220  it != end(to_add) && next(it) != end(to_add);
221  it = to_add.erase(next(it)));
222  }
223  }
224  };
225 
226 } // namespace getfem
Takes a matrix or vector, or vector of matrices or vectors and creates an empty copy on each thread.
Use this template class for any object you want to distribute to open_MP threads.
Definition: getfem_omp.h:325
Tools for multithreaded, OpenMP and Boost based parallelization.
#define GETFEM_OMP_PARALLEL(body)
Organizes a proper parallel omp section:
Definition: getfem_omp.h:482
Basic definitions and tools of GMM.
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:48
GEneric Tool for Finite Element Methods.
void equal_resize(T &a, const T &b)
Resize 'a' to the same size as 'b'.
void gen_add(const T &a, T &b)
Generic addition for gmm types as well as vectors of gmm types.
bool me_is_multithreaded_now()
is the program running in the parallel section
Definition: getfem_omp.cc:105