Galois
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
libgluon/include/galois/cuda/Context.h
Go to the documentation of this file.
1 /*
2  * This file belongs to the Galois project, a C++ library for exploiting
3  * parallelism. The code is being released under the terms of the 3-Clause BSD
4  * License (a copy is located in LICENSE.txt at the top-level directory).
5  *
6  * Copyright (C) 2018, The University of Texas at Austin. All rights reserved.
7  * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS
8  * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY,
9  * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF
10  * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF
11  * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH
12  * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances
13  * shall University be liable for incidental, special, indirect, direct or
14  * consequential damages or loss of profits, interruption of business, or
15  * related expenses which may arise from use of Software or Documentation,
16  * including but not limited to those resulting from defects in Software and/or
17  * Documentation, or loss or inaccuracy of data of any kind.
18  */
19 
20 /*
21  */
22 
31 #pragma once
32 #include <cuda.h>
33 #include "gg.h"
34 #include "galois/cuda/HostDecls.h"
35 
37  unsigned int* num_nodes; // per host
38  DeviceOnly<unsigned int>* nodes; // per host
39 };
40 
42  int device;
43  int id;
44  unsigned int numOwned; // Number of nodes owned (masters) by this host
45  unsigned int beginMaster; // local id of the beginning of master nodes
46  unsigned int numNodesWithEdges; // Number of nodes (masters + mirrors) that
47  // have outgoing edges
48  CSRGraphTy gg;
51  DeviceOnly<unsigned int> offsets; // union across master/mirror of all hosts
52  Shared<DynamicBitset> is_updated; // union across master/mirror of all hosts
53 };
54 
55 template <typename Type>
57  Shared<Type> data;
58  Shared<DynamicBitset> is_updated;
59  DeviceOnly<Type> shared_data; // union across master/mirror of all hosts
60 };
61 
62 bool init_CUDA_context_common(struct CUDA_Context_Common* ctx, int device) {
63  struct cudaDeviceProp dev;
64  if (device == -1) {
65  check_cuda(cudaGetDevice(&device));
66  } else {
67  int count;
68  check_cuda(cudaGetDeviceCount(&count));
69  if (device > count) {
70  fprintf(stderr, "Error: Out-of-range GPU %d specified (%d total GPUs)",
71  device, count);
72  return false;
73  }
74  check_cuda(cudaSetDevice(device));
75  }
76  ctx->device = device;
77  check_cuda(cudaGetDeviceProperties(&dev, device));
78  printf("[%d] Using GPU %d: %s\n", ctx->id, device, dev.name);
79  return true;
80 }
81 
83  unsigned num_hosts) {
84  CSRGraphTy graph;
85  ctx->numOwned = g.numOwned;
86  ctx->beginMaster = g.beginMaster;
88  assert(ctx->id == g.id);
89 
90  size_t mem_usage = ((g.nnodes + 1) + g.nedges) * sizeof(index_type) +
91  (g.nnodes) * sizeof(node_data_type);
92  if (!g.edge_data)
93  mem_usage += (g.nedges) * sizeof(edge_data_type);
94  printf("[%d] Host memory for graph: %3u MB\n", ctx->id, mem_usage / 1048756);
95 
96  // copy the graph to the GPU
97  graph.nnodes = g.nnodes;
98  graph.nedges = g.nedges;
99  graph.row_start = g.row_start;
100  graph.edge_dst = g.edge_dst;
101  graph.node_data = g.node_data;
102  graph.edge_data = g.edge_data;
103  graph.copy_to_gpu(ctx->gg);
104 
105  size_t max_shared_size = 0; // for union across master/mirror of all hosts
106  ctx->master.num_nodes =
107  (unsigned int*)calloc(num_hosts, sizeof(unsigned int));
108  memcpy(ctx->master.num_nodes, g.num_master_nodes,
109  sizeof(unsigned int) * num_hosts);
110  ctx->master.nodes = (DeviceOnly<unsigned int>*)calloc(
111  num_hosts, sizeof(Shared<unsigned int>));
112  for (uint32_t h = 0; h < num_hosts; ++h) {
113  if (ctx->master.num_nodes[h] > 0) {
114  ctx->master.nodes[h].alloc(ctx->master.num_nodes[h]);
115  ctx->master.nodes[h].copy_to_gpu(g.master_nodes[h],
116  ctx->master.num_nodes[h]);
117  }
118  if (ctx->master.num_nodes[h] > max_shared_size) {
119  max_shared_size = ctx->master.num_nodes[h];
120  }
121  }
122  ctx->mirror.num_nodes =
123  (unsigned int*)calloc(num_hosts, sizeof(unsigned int));
124  memcpy(ctx->mirror.num_nodes, g.num_mirror_nodes,
125  sizeof(unsigned int) * num_hosts);
126  ctx->mirror.nodes = (DeviceOnly<unsigned int>*)calloc(
127  num_hosts, sizeof(Shared<unsigned int>));
128  for (uint32_t h = 0; h < num_hosts; ++h) {
129  if (ctx->mirror.num_nodes[h] > 0) {
130  ctx->mirror.nodes[h].alloc(ctx->mirror.num_nodes[h]);
131  ctx->mirror.nodes[h].copy_to_gpu(g.mirror_nodes[h],
132  ctx->mirror.num_nodes[h]);
133  }
134  if (ctx->mirror.num_nodes[h] > max_shared_size) {
135  max_shared_size = ctx->mirror.num_nodes[h];
136  }
137  }
138  ctx->offsets.alloc(max_shared_size);
139  ctx->is_updated.alloc(1);
140  ctx->is_updated.cpu_wr_ptr()->alloc(max_shared_size);
141  // printf("[%u] load_graph_GPU: %u owned nodes of total %u resident, %lu
142  // edges\n", ctx->id, ctx->nowned, graph.nnodes, graph.nedges);
143 }
144 
145 size_t mem_usage_CUDA_common(MarshalGraph& g, unsigned num_hosts) {
146  size_t mem_usage = 0;
147  size_t max_shared_size = 0; // for union across master/mirror of all hosts
148  mem_usage += num_hosts * sizeof(unsigned int);
149  mem_usage += num_hosts * sizeof(Shared<unsigned int>);
150  for (uint32_t h = 0; h < num_hosts; ++h) {
151  if (g.num_master_nodes[h] > 0) {
152  mem_usage += g.num_master_nodes[h] * sizeof(unsigned int);
153  }
154  if (g.num_master_nodes[h] > max_shared_size) {
155  max_shared_size = g.num_master_nodes[h];
156  }
157  }
158  mem_usage += num_hosts * sizeof(unsigned int);
159  mem_usage += num_hosts * sizeof(Shared<unsigned int>);
160  for (uint32_t h = 0; h < num_hosts; ++h) {
161  if (g.num_mirror_nodes[h] > 0) {
162  mem_usage += g.num_mirror_nodes[h] * sizeof(unsigned int);
163  }
164  if (g.num_mirror_nodes[h] > max_shared_size) {
165  max_shared_size = g.num_mirror_nodes[h];
166  }
167  }
168  mem_usage += max_shared_size * sizeof(unsigned int);
169  mem_usage += ((max_shared_size + 63) / 64) * sizeof(unsigned long long int);
170  return mem_usage;
171 }
172 
173 template <typename Type>
175  struct CUDA_Context_Field<Type>* field,
176  unsigned num_hosts) {
177  field->data.alloc(ctx->gg.nnodes);
178  size_t max_shared_size = 0; // for union across master/mirror of all hosts
179  for (uint32_t h = 0; h < num_hosts; ++h) {
180  if (ctx->master.num_nodes[h] > max_shared_size) {
181  max_shared_size = ctx->master.num_nodes[h];
182  }
183  }
184  for (uint32_t h = 0; h < num_hosts; ++h) {
185  if (ctx->mirror.num_nodes[h] > max_shared_size) {
186  max_shared_size = ctx->mirror.num_nodes[h];
187  }
188  }
189  field->shared_data.alloc(max_shared_size);
190  field->is_updated.alloc(1);
191  field->is_updated.cpu_wr_ptr()->alloc(ctx->gg.nnodes);
192 }
193 
194 template <typename Type>
196  MarshalGraph& g, unsigned num_hosts) {
197  size_t mem_usage = 0;
198  mem_usage += g.nnodes * sizeof(Type);
199  size_t max_shared_size = 0; // for union across master/mirror of all hosts
200  for (uint32_t h = 0; h < num_hosts; ++h) {
201  if (g.num_master_nodes[h] > max_shared_size) {
202  max_shared_size = g.num_master_nodes[h];
203  }
204  }
205  for (uint32_t h = 0; h < num_hosts; ++h) {
206  if (g.num_mirror_nodes[h] > max_shared_size) {
207  max_shared_size = g.num_mirror_nodes[h];
208  }
209  }
210  mem_usage += max_shared_size * sizeof(Type);
211  mem_usage += ((g.nnodes + 63) / 64) * sizeof(unsigned long long int);
212  return mem_usage;
213 }
Contains forward declarations and the definition of the MarshalGraph class, which is used to marshal ...
struct CUDA_Context_Shared mirror
Definition: libgluon/include/galois/cuda/Context.h:50
bool init_CUDA_context_common(struct CUDA_Context_Common *ctx, int device)
Definition: libgluon/include/galois/cuda/Context.h:62
unsigned int numOwned
Definition: libgluon/include/galois/cuda/Context.h:44
Shared< DynamicBitset > is_updated
Definition: libgluon/include/galois/cuda/Context.h:52
unsigned int numNodesWithEdges
Definition: libgluon/include/galois/cuda/Context.h:46
DeviceOnly< Type > shared_data
Definition: libgluon/include/galois/cuda/Context.h:59
unsigned int index_type
Definition: EdgeHostDecls.h:33
unsigned int * num_mirror_nodes
Definition: HostDecls.h:53
unsigned int numNodesWithEdges
Definition: HostDecls.h:43
Shared< DynamicBitset > is_updated
Definition: libgluon/include/galois/cuda/Context.h:58
Definition: libgluon/include/galois/cuda/Context.h:56
unsigned int beginMaster
Definition: libgluon/include/galois/cuda/Context.h:45
void load_graph_CUDA_field(struct CUDA_Context_Common *ctx, struct CUDA_Context_Field< Type > *field, unsigned num_hosts)
Definition: libgluon/include/galois/cuda/Context.h:174
unsigned int numOwned
Definition: HostDecls.h:41
size_t mem_usage_CUDA_common(MarshalGraph &g, unsigned num_hosts)
Definition: libgluon/include/galois/cuda/Context.h:145
edge_data_type * edge_data
Definition: HostDecls.h:50
Definition: libgluon/include/galois/cuda/Context.h:36
void load_graph_CUDA_common(struct CUDA_Context_Common *ctx, MarshalGraph &g, unsigned num_hosts)
Definition: libgluon/include/galois/cuda/Context.h:82
size_t mem_usage_CUDA_field(struct CUDA_Context_Field< Type > *field, MarshalGraph &g, unsigned num_hosts)
Definition: libgluon/include/galois/cuda/Context.h:195
int id
Definition: libgluon/include/galois/cuda/Context.h:43
Definition: libgluon/include/galois/cuda/Context.h:41
size_t nnodes
Definition: HostDecls.h:39
unsigned int * num_master_nodes
Definition: HostDecls.h:51
int id
Definition: HostDecls.h:45
index_type * edge_dst
Definition: HostDecls.h:48
unsigned int ** master_nodes
Definition: HostDecls.h:52
int device
Definition: libgluon/include/galois/cuda/Context.h:42
unsigned int node_data_type
Definition: EdgeHostDecls.h:34
unsigned int * num_nodes
Definition: libgluon/include/galois/cuda/Context.h:37
Shared< Type > data
Definition: libgluon/include/galois/cuda/Context.h:57
index_type * row_start
Definition: HostDecls.h:47
DeviceOnly< unsigned int > offsets
Definition: libgluon/include/galois/cuda/Context.h:51
Definition: HostDecls.h:38
CSRGraphTy gg
Definition: libgluon/include/galois/cuda/Context.h:48
unsigned edge_data_type
Definition: EdgeHostDecls.h:35
size_t nedges
Definition: HostDecls.h:40
unsigned int beginMaster
Definition: HostDecls.h:42
unsigned int ** mirror_nodes
Definition: HostDecls.h:54
node_data_type * node_data
Definition: HostDecls.h:49
struct CUDA_Context_Shared master
Definition: libgluon/include/galois/cuda/Context.h:49
DeviceOnly< unsigned int > * nodes
Definition: libgluon/include/galois/cuda/Context.h:38