api/current/EdgeContext_8h_source.html

 /*

  * This file belongs to the Galois project, a C++ library for exploiting

  * parallelism. The code is being released under the terms of the 3-Clause BSD

  * License (a copy is located in LICENSE.txt at the top-level directory).

  *

  * Copyright (C) 2018, The University of Texas at Austin. All rights reserved.

  * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS

  * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY,

  * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF

  * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF

  * DEALING OR USAGE OF TRADE.  NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH

  * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances

  * shall University be liable for incidental, special, indirect, direct or

  * consequential damages or loss of profits, interruption of business, or

  * related expenses which may arise from use of Software or Documentation,

  * including but not limited to those resulting from defects in Software and/or

  * Documentation, or loss or inaccuracy of data of any kind.

  */


 #pragma once


 #pragma once

 #include <cuda.h>

 #include "gg.h"

 #pragma once

 #include "galois/cuda/EdgeHostDecls.h"


 struct CUDA_Context_Shared_Edges {

   unsigned int* num_edges;         // per host

   DeviceOnly<unsigned int>* edges; // per host

 };


 struct CUDA_Context_Common_Edges {

   int device;

   int id;

   unsigned int numOwned;    // Number of nodes owned (masters) by this host

   unsigned int beginMaster; // local id of the beginning of master nodes

   unsigned int numNodesWithEdges; // Number of nodes (masters + mirrors) that

                                   // have outgoing edges

   CSRGraphTy gg;

   struct CUDA_Context_Shared_Edges master;

   struct CUDA_Context_Shared_Edges mirror;

   DeviceOnly<unsigned int> offsets; // union across master/mirror of all hosts

   Shared<DynamicBitset> is_updated; // union across master/mirror of all hosts

 };


 template <typename Type>

 struct CUDA_Context_Field_Edges {

   Shared<Type> data;

   Shared<DynamicBitset> is_updated; // size of edges

   DeviceOnly<Type> shared_data;     // union across master/mirror of all hosts

 };


 bool init_CUDA_context_common_edges(struct CUDA_Context_Common_Edges* ctx,

                                     int device) {

   struct cudaDeviceProp dev;

   if (device == -1) {

     check_cuda(cudaGetDevice(&device));

   } else {

     int count;

     check_cuda(cudaGetDeviceCount(&count));

     if (device > count) {

       fprintf(stderr, "Error: Out-of-range GPU %d specified (%d total GPUs)",

               device, count);

       return false;

     }

     check_cuda(cudaSetDevice(device));

   }

   ctx->device = device;

   check_cuda(cudaGetDeviceProperties(&dev, device));

   printf("[%d] Using GPU %d: %s\n", ctx->id, device, dev.name);

   return true;

 }


 void load_graph_CUDA_common_edges(struct CUDA_Context_Common_Edges* ctx,

                                   EdgeMarshalGraph& g, unsigned num_hosts,

                                   bool LoadProxyEdges = true) {

   CSRGraphTy graph;

   ctx->numOwned          = g.numOwned;

   ctx->beginMaster       = g.beginMaster;

   ctx->numNodesWithEdges = g.numNodesWithEdges;

   assert(ctx->id == g.id);


   size_t mem_usage = ((g.nnodes + 1) + g.nedges) * sizeof(index_type) +

                      (g.nnodes) * sizeof(node_data_type);

   if (!g.edge_data)

     mem_usage += (g.nedges) * sizeof(edge_data_type);

   printf("[%d] Host memory for graph: %3u MB\n", ctx->id, mem_usage / 1048756);


   // copy the graph to the GPU

   graph.nnodes    = g.nnodes;

   graph.nedges    = g.nedges;

   graph.row_start = g.row_start;

   graph.edge_dst  = g.edge_dst;

   graph.node_data = g.node_data;

   graph.edge_data = g.edge_data;

   graph.copy_to_gpu(ctx->gg);


   if (LoadProxyEdges) {

     size_t max_shared_size = 0; // for union across master/mirror of all hosts

     ctx->master.num_edges =

         (unsigned int*)calloc(num_hosts, sizeof(unsigned int));

     memcpy(ctx->master.num_edges, g.num_master_edges,

            sizeof(unsigned int) * num_hosts);

     ctx->master.edges = (DeviceOnly<unsigned int>*)calloc(

         num_hosts, sizeof(Shared<unsigned int>));

     for (uint32_t h = 0; h < num_hosts; ++h) {

       if (ctx->master.num_edges[h] > 0) {

         ctx->master.edges[h].alloc(ctx->master.num_edges[h]);

         ctx->master.edges[h].copy_to_gpu(g.master_edges[h],

                                          ctx->master.num_edges[h]);

       }

       if (ctx->master.num_edges[h] > max_shared_size) {

         max_shared_size = ctx->master.num_edges[h];

       }

     }

     ctx->mirror.num_edges =

         (unsigned int*)calloc(num_hosts, sizeof(unsigned int));

     memcpy(ctx->mirror.num_edges, g.num_mirror_edges,

            sizeof(unsigned int) * num_hosts);

     ctx->mirror.edges = (DeviceOnly<unsigned int>*)calloc(

         num_hosts, sizeof(Shared<unsigned int>));

     for (uint32_t h = 0; h < num_hosts; ++h) {

       if (ctx->mirror.num_edges[h] > 0) {

         ctx->mirror.edges[h].alloc(ctx->mirror.num_edges[h]);

         ctx->mirror.edges[h].copy_to_gpu(g.mirror_edges[h],

                                          ctx->mirror.num_edges[h]);

       }

       if (ctx->mirror.num_edges[h] > max_shared_size) {

         max_shared_size = ctx->mirror.num_edges[h];

       }

     }

     ctx->offsets.alloc(max_shared_size);

     ctx->is_updated.alloc(1);

     ctx->is_updated.cpu_wr_ptr()->alloc(max_shared_size);

   }

   // printf("[%u] load_graph_GPU: %u owned nodes of total %u resident, %lu

   // edges\n", ctx->id, ctx->nowned, graph.nnodes, graph.nedges);

 }


 size_t mem_usage_CUDA_common_edges(EdgeMarshalGraph& g, unsigned num_hosts) {

   size_t mem_usage       = 0;

   size_t max_shared_size = 0; // for union across master/mirror of all hosts

   mem_usage += num_hosts * sizeof(unsigned int);

   mem_usage += num_hosts * sizeof(Shared<unsigned int>);

   for (uint32_t h = 0; h < num_hosts; ++h) {

     if (g.num_master_edges[h] > 0) {

       mem_usage += g.num_master_edges[h] * sizeof(unsigned int);

     }

     if (g.num_master_edges[h] > max_shared_size) {

       max_shared_size = g.num_master_edges[h];

     }

   }

   mem_usage += num_hosts * sizeof(unsigned int);

   mem_usage += num_hosts * sizeof(Shared<unsigned int>);

   for (uint32_t h = 0; h < num_hosts; ++h) {

     if (g.num_mirror_edges[h] > 0) {

       mem_usage += g.num_mirror_edges[h] * sizeof(unsigned int);

     }

     if (g.num_mirror_edges[h] > max_shared_size) {

       max_shared_size = g.num_mirror_edges[h];

     }

   }

   mem_usage += max_shared_size * sizeof(unsigned int);

   mem_usage += ((max_shared_size + 63) / 64) * sizeof(unsigned long long int);

   return mem_usage;

 }


 template <typename Type>

 void load_graph_CUDA_field_edges(struct CUDA_Context_Common_Edges* ctx,

                                  struct CUDA_Context_Field_Edges<Type>* field,

                                  unsigned num_hosts) {

   field->data.alloc(ctx->gg.nedges);

   size_t max_shared_size = 0; // for union across master/mirror of all hosts

   for (uint32_t h = 0; h < num_hosts; ++h) {

     if (ctx->master.num_edges[h] > max_shared_size) {

       max_shared_size = ctx->master.num_edges[h];

     }

   }

   for (uint32_t h = 0; h < num_hosts; ++h) {

     if (ctx->mirror.num_edges[h] > max_shared_size) {

       max_shared_size = ctx->mirror.num_edges[h];

     }

   }

   field->shared_data.alloc(max_shared_size);

   field->is_updated.alloc(1);

   field->is_updated.cpu_wr_ptr()->alloc(ctx->gg.nedges);

 }


 template <typename Type>

 size_t mem_usage_CUDA_field_edges(struct CUDA_Context_Field_Edges<Type>* field,

                                   EdgeMarshalGraph& g, unsigned num_hosts) {

   size_t mem_usage = 0;

   mem_usage += g.nedges * sizeof(Type);

   size_t max_shared_size = 0; // for union across master/mirror of all hosts

   for (uint32_t h = 0; h < num_hosts; ++h) {

     if (g.num_master_edges[h] > max_shared_size) {

       max_shared_size = g.num_master_edges[h];

     }

   }

   for (uint32_t h = 0; h < num_hosts; ++h) {

     if (g.num_mirror_edges[h] > max_shared_size) {

       max_shared_size = g.num_mirror_edges[h];

     }

   }

   mem_usage += max_shared_size * sizeof(Type);

   mem_usage += ((g.nedges + 63) / 64) * sizeof(unsigned long long int);

   return mem_usage;

 }

load_graph_CUDA_common_edges
void load_graph_CUDA_common_edges(struct CUDA_Context_Common_Edges *ctx, EdgeMarshalGraph &g, unsigned num_hosts, bool LoadProxyEdges=true)
Definition: EdgeContext.h:83

EdgeMarshalGraph::num_master_edges
unsigned int * num_master_edges
Definition: EdgeHostDecls.h:51

CUDA_Context_Common_Edges::numNodesWithEdges
unsigned int numNodesWithEdges
Definition: EdgeContext.h:46

EdgeMarshalGraph::nedges
size_t nedges
Definition: EdgeHostDecls.h:40

EdgeMarshalGraph::numOwned
unsigned int numOwned
Definition: EdgeHostDecls.h:41

EdgeMarshalGraph::mirror_edges
unsigned int ** mirror_edges
Definition: EdgeHostDecls.h:54

EdgeMarshalGraph::master_edges
unsigned int ** master_edges
Definition: EdgeHostDecls.h:52

EdgeMarshalGraph::row_start
index_type * row_start
Definition: EdgeHostDecls.h:47

CUDA_Context_Field_Edges::data
Shared< Type > data
Definition: EdgeContext.h:57

CUDA_Context_Field_Edges::shared_data
DeviceOnly< Type > shared_data
Definition: EdgeContext.h:59

CUDA_Context_Shared_Edges
Definition: EdgeContext.h:36

index_type
unsigned int index_type
Definition: EdgeHostDecls.h:33

EdgeHostDecls.h
Contains forward declarations and the definition of the EdgeMarshalGraph class, which is used to mars...

CUDA_Context_Field_Edges
Definition: EdgeContext.h:56

EdgeMarshalGraph::numNodesWithEdges
unsigned int numNodesWithEdges
Definition: EdgeHostDecls.h:43

CUDA_Context_Common_Edges::mirror
struct CUDA_Context_Shared_Edges mirror
Definition: EdgeContext.h:50

EdgeMarshalGraph::id
int id
Definition: EdgeHostDecls.h:45

mem_usage_CUDA_common_edges
size_t mem_usage_CUDA_common_edges(EdgeMarshalGraph &g, unsigned num_hosts)
Definition: EdgeContext.h:149

EdgeMarshalGraph::num_mirror_edges
unsigned int * num_mirror_edges
Definition: EdgeHostDecls.h:53

CUDA_Context_Common_Edges::beginMaster
unsigned int beginMaster
Definition: EdgeContext.h:45

CUDA_Context_Common_Edges::is_updated
Shared< DynamicBitset > is_updated
Definition: EdgeContext.h:52

EdgeMarshalGraph::node_data
node_data_type * node_data
Definition: EdgeHostDecls.h:49

CUDA_Context_Common_Edges::device
int device
Definition: EdgeContext.h:42

CUDA_Context_Common_Edges::offsets
DeviceOnly< unsigned int > offsets
Definition: EdgeContext.h:51

CUDA_Context_Shared_Edges::num_edges
unsigned int * num_edges
Definition: EdgeContext.h:37

load_graph_CUDA_field_edges
void load_graph_CUDA_field_edges(struct CUDA_Context_Common_Edges *ctx, struct CUDA_Context_Field_Edges< Type > *field, unsigned num_hosts)
Definition: EdgeContext.h:178

EdgeMarshalGraph::beginMaster
unsigned int beginMaster
Definition: EdgeHostDecls.h:42

EdgeMarshalGraph::edge_dst
index_type * edge_dst
Definition: EdgeHostDecls.h:48

CUDA_Context_Common_Edges::numOwned
unsigned int numOwned
Definition: EdgeContext.h:44

CUDA_Context_Common_Edges::gg
CSRGraphTy gg
Definition: EdgeContext.h:48

CUDA_Context_Common_Edges::master
struct CUDA_Context_Shared_Edges master
Definition: EdgeContext.h:49

node_data_type
unsigned int node_data_type
Definition: EdgeHostDecls.h:34

EdgeMarshalGraph::nnodes
size_t nnodes
Definition: EdgeHostDecls.h:39

mem_usage_CUDA_field_edges
size_t mem_usage_CUDA_field_edges(struct CUDA_Context_Field_Edges< Type > *field, EdgeMarshalGraph &g, unsigned num_hosts)
Definition: EdgeContext.h:199

CUDA_Context_Shared_Edges::edges
DeviceOnly< unsigned int > * edges
Definition: EdgeContext.h:38

init_CUDA_context_common_edges
bool init_CUDA_context_common_edges(struct CUDA_Context_Common_Edges *ctx, int device)
Definition: EdgeContext.h:62

CUDA_Context_Common_Edges
Definition: EdgeContext.h:41

CUDA_Context_Common_Edges::id
int id
Definition: EdgeContext.h:43

EdgeMarshalGraph::edge_data
edge_data_type * edge_data
Definition: EdgeHostDecls.h:50

edge_data_type
unsigned edge_data_type
Definition: EdgeHostDecls.h:35

EdgeMarshalGraph
Definition: EdgeHostDecls.h:38

CUDA_Context_Field_Edges::is_updated
Shared< DynamicBitset > is_updated
Definition: EdgeContext.h:58