|
template<typename T , typename S > |
void | memset (GlobalAddress< T > base, S value, size_t count) |
| Initialize an array of elements of generic type with a given value. More...
|
|
template<typename T , typename S > |
void | memset (T *base, S value, size_t count) |
| Type-based memset for local arrays to match what is provided for distributed arrays. More...
|
|
template<typename T > |
void | memcpy (GlobalAddress< T > dst, GlobalAddress< T > src, size_t nelem) |
| Memcpy over Grappa global arrays. More...
|
|
template<typename T > |
void | memcpy (T *dst, T *src, size_t nelem) |
| Helper so we don't have to change the code if we change a Global pointer to a normal pointer (in theory). More...
|
|
template<> |
void | memcpy< void > (void *dst, void *src, size_t nelem) |
|
template<GlobalCompletionEvent * GCE = &impl::local_gce, typename T = void> |
void | memcpy_async (GlobalAddress< T > dst, GlobalAddress< T > src, size_t nelem) |
| Asynchronous version of memcpy, spawns only on cores with array elements. More...
|
|
template<typename T > |
void | prefix_sum (GlobalAddress< T > array, size_t nelem) |
| not implemented yet More...
|
|
void | barrier () |
| Blocking SPMD barrier (must be called once on all cores to continue) More...
|
|
template<typename F > |
void | call_on_all_cores (F work) |
| Call message (work that cannot block) on all cores, block until ack received from all. More...
|
|
template<typename F > |
void | on_all_cores (F work) |
| Spawn a private task on each core, block until all complete. More...
|
|
template<typename T , T(*)(const T &, const T &) ReduceOp> |
T | allreduce (T myval) |
| Called from SPMD context, reduces values from all cores calling allreduce and returns reduced values to everyone. More...
|
|
template<typename T , T(*)(const T &, const T &) ReduceOp> |
void | allreduce_inplace (T *array, size_t nelem=1) |
| Called from SPMD context. More...
|
|
template<typename T , T(*)(const T &, const T &) ReduceOp> |
T | reduce (const T *global_ptr) |
| Called from a single task (usually user_main), reduces values from all cores onto the calling node. More...
|
|
template<typename T , T(*)(const T &, const T &) ReduceOp> |
T | reduce (GlobalAddress< T > localizable) |
| Reduce over a symmetrically allocated object. More...
|
|
template<typename T , typename P , T(*)(const T &, const T &) ReduceOp, T(*)(GlobalAddress< P >) Accessor> |
T | reduce (GlobalAddress< P > localizable) |
| Reduce over a member of a symmetrically allocated object. More...
|
|
template<typename F = nullptr_t> |
auto | sum_all_cores (F func) -> decltype(func()) |
| Custom reduction from all cores. More...
|
|
double | walltime (void) |
| "Universal" wallclock time (works at least for Mac, and most Linux) More...
|
|
template<typename T > |
const char * | typename_of () |
| Get string containing name of type. More...
|
|
template<typename T > |
const char * | typename_of (const T &unused) |
| Get string containing name of type. More...
|
|
const Core | cores () |
| How many cores are there in this job? More...
|
|
const Core | mycore () |
| What's my core ID in this job? More...
|
|
const Core | locale_cores () |
| How many cores are in my shared memory domain? More...
|
|
const Core | locale_mycore () |
| What's my core ID within my shared memory domain? More...
|
|
const Locale | locales () |
| How many shared memory domains are in this job? More...
|
|
const Locale | mylocale () |
| What's my shared memory domain ID within this job? More...
|
|
const Locale | locale_of (Core c) |
| What shared memory domain does core c belong to? More...
|
|
const char * | hostname () |
| What name does MPI think this node has? More...
|
|
template<typename CompletionType > |
void | complete (CompletionType *ce) |
| Match ConditionVariable-style function call. More...
|
|
void | complete (GlobalAddress< CompletionEvent > ce, int64_t decr=1) |
| Overload to work on GlobalAddresses. More...
|
|
void | enroll (GlobalAddress< CompletionEvent > ce, int64_t incr=1) |
|
template<TaskMode B = TaskMode::Bound, typename TF = decltype(nullptr)> |
void | spawn (CompletionEvent *ce, TF tf) |
| Spawn Grappa::privateTask and implicitly synchronize with the given CompletionEvent (or GlobalCompletionEvent, though if using GlobalCompletionEvent, it may be better to use the verison that takes the GCE pointer as a template parameter only). More...
|
|
void | wait (GlobalAddress< ConditionVariable > m) |
| Proxy for remote ConditionVariable manipulation. More...
|
|
template<typename ConditionVariable > |
void | signal (const GlobalAddress< ConditionVariable > m) |
|
void | signal_all (GlobalAddress< ConditionVariable > m) |
| TODO: implement. More...
|
|
template<typename ConditionVariable > |
void | add_waiter (ConditionVariable *cv, Worker *w) |
| Verify that ConditionVariable is only one word. More...
|
|
template<typename ConditionVariable > |
void | wait (ConditionVariable *cv) |
|
template<typename ConditionVariable > |
void | signal (ConditionVariable *cv) |
| Wake one waiter on a condition variable. More...
|
|
template<typename ConditionVariable > |
void | broadcast (ConditionVariable *cv) |
| Wake all waiters on a condition variable. More...
|
|
template<TaskMode B = TaskMode::Bound, GlobalCompletionEvent * C = &impl::local_gce, typename F = decltype(nullptr)> |
void | spawnRemote (Core dest, F f) |
| Synchronizing remote private task spawn. More...
|
|
template<typename T > |
Grappa::ExternalCountPayloadMessage Grappa::PayloadMessage | __attribute__ ((aligned(64))) |
|
template<typename T > |
ExternalCountPayloadMessage< T > * | send_heap_message (Core dest, T t, void *payload, size_t payload_size, uint64_t *count) |
|
template<typename T > |
void | read_array (File &f, GlobalAddress< T > array, size_t nelem) |
| Read a file or directory of files into a global array. More...
|
|
template<typename T > |
void | save_array (File &f, bool asDirectory, GlobalAddress< T > array, size_t nelem) |
|
template<typename T > |
void | write_array_unordered (std::string filename, GlobalAddress< T > array, size_t nelem) |
|
template<typename T > |
void | read_array_unordered (std::string filename, GlobalAddress< T > array, size_t nelem) |
|
template<typename T > |
void | fill_remote (GlobalAddress< FullEmpty< T >> result_addr, const T &val) |
|
template<typename T > |
T | readFF (GlobalAddress< FullEmpty< T >> fe_addr) |
| Remote version of readFF method. Suspend until FullEmpty is full, and then return its contents, leaving it full. More...
|
|
template<typename T , typename U > |
void | writeXF (GlobalAddress< FullEmpty< T >> fe_addr, const U &val) |
| Remote, blocking version of writeXF method. Writes data to FullEmpty no matter what its current state, leaving it full. Existing contents are overwritten. More...
|
|
template<typename T > |
T | writeXF (FullEmpty< T > *fe_addr, T t) |
| Non-member version of writeXF method. More...
|
|
template<typename T > |
T | writeEF (FullEmpty< T > *fe_addr, T t) |
| Non-member version of writeEF method. More...
|
|
template<typename T > |
T | writeFF (FullEmpty< T > *fe_addr, T t) |
| Non-member version of writeFF method. More...
|
|
template<typename T > |
T | readXX (FullEmpty< T > *fe_addr) |
| Non-member version of readXX method. More...
|
|
template<typename T > |
T | readFF (FullEmpty< T > *fe_addr) |
| Non-member version of readFF method. More...
|
|
template<typename T > |
T | readFE (FullEmpty< T > *fe_addr) |
| Non-member version of readFE method. More...
|
|
template<typename T = int8_t> |
GlobalAddress< T > | global_alloc (size_t count) |
| Allocate bytes from the global shared heap. More...
|
|
template<typename T > |
void | global_free (GlobalAddress< T > address) |
| Free memory allocated from global shared heap. More...
|
|
template<typename T , Core MASTER_CORE = 0> |
GlobalAddress< T > | symmetric_global_alloc () |
| Allocate space for a T at the same localizable global address on all cores (must currently round up to a multiple of block_size plus an additional block to ensure there is a valid address range no matter which core allocation starts on). More...
|
|
template<SyncMode S = SyncMode::Blocking, GlobalCompletionEvent * C = &impl::local_gce, int64_t Th = impl::USE_LOOP_THRESHOLD_FLAG, typename F = nullptr_t, typename T > |
void | forall (GlobalAddress< GlobalBag< T >> b, F body) |
|
CompletionTarget | enroll (GlobalAddress< GlobalCompletionEvent > ce, int64_t decr=1) |
|
void | complete (GlobalAddress< GlobalCompletionEvent > ce, int64_t decr=1) |
| Allow calling send_completion using the old way (with global address) TODO: replace all instances with gce.send_completion and remove this? More...
|
|
template<TaskMode B, GlobalCompletionEvent * C, typename TF = decltype(nullptr)> |
void | spawn (TF tf) |
| Synchronizing private task spawn. More...
|
|
template<GlobalCompletionEvent * C = &impl::local_gce, typename F = decltype(nullptr)> |
void | finish (F f) |
|
template<SyncMode S = SyncMode::Blocking, GlobalCompletionEvent * C = &impl::local_gce, typename K = nullptr_t, typename V = nullptr_t, typename F = nullptr_t> |
void | insert (GlobalAddress< GlobalHashMap< K, V >> self, K key, F on_insert) |
|
template<GlobalCompletionEvent * GCE = &impl::local_gce, int64_t Threshold = impl::USE_LOOP_THRESHOLD_FLAG, typename T = decltype(nullptr), typename V = decltype(nullptr), typename F = decltype(nullptr)> |
void | forall (GlobalAddress< GlobalHashMap< T, V >> self, F visit) |
|
template<typename G > |
AdjIterator< G > | adj (GlobalAddress< G > g, typename G::Vertex &v) |
| Iterator over adjacent vertices. Used with Grappa::forall(). More...
|
|
template<typename G > |
AdjIterator< G > | adj (GlobalAddress< G > g, GlobalAddress< typename G::Vertex > v) |
|
template<typename G > |
AdjIterator< G > | adj (GlobalAddress< G > g, VertexID i) |
|
| OVERLOAD (SyncMode S=SyncMode::Blocking, GlobalCompletionEvent *C=&impl::local_gce, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) |
| Parallel loop over adjacent vertices. Use adj() to construct iterator. More...
|
|
| OVERLOAD (GlobalCompletionEvent *C, SyncMode S=SyncMode::Blocking, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) |
|
template<typename G = nullptr_t, typename F = nullptr_t> |
void | serial_for (AdjIterator< G > a, F body) |
|
template<GlobalCompletionEvent * C = &impl::local_gce, int64_t Threshold = impl::USE_LOOP_THRESHOLD_FLAG, typename V , typename E , typename F = nullptr_t> |
void | forall (GlobalAddress< Graph< V, E >> g, F loop_body) |
| Parallel iterator over Graph, specializes based on arguments. More...
|
|
void | local_load_bintsv4 (const char *filename, Grappa::TupleGraph::Edge *local_ptr, Grappa::TupleGraph::Edge *local_end) |
| reading More...
|
|
void | local_save_bintsv4 (const char *filename, Grappa::TupleGraph::Edge *local_ptr, Grappa::TupleGraph::Edge *local_end) |
| helper function run on each core to save edges stored as int32_t tuples More...
|
|
void | local_save_tsv (const char *filename, Grappa::TupleGraph::Edge *local_ptr, Grappa::TupleGraph::Edge *local_end) |
| helper function run on each core to save edges stored as ASCII tab-delimited pairs More...
|
|
void | global_heap_init (size_t init_size) |
|
void | init (int *argc_p, char **argv_p[], int64_t size=-1) |
| Initialize Grappa. More...
|
|
int | finalize () |
| Clean up Grappa. More...
|
|
template<typename T > |
T * | locale_alloc (size_t n=1) |
| Allocate memory in locale shared heap. More...
|
|
template<typename T > |
T * | locale_alloc_aligned (size_t alignment, size_t n=1) |
|
template<typename T , typename... Args> |
T * | locale_new (Args &&...args) |
| allocate an object in the locale shared heap, passing arguments to its constructor More...
|
|
template<typename T > |
T * | locale_new () |
| allocate an object in the locale shared heap More...
|
|
template<typename T > |
T * | locale_new_array (size_t n=1) |
| allocate an array in the locale shared heap More...
|
|
void | locale_free (void *ptr) |
| Free memory that was allocated from locale shared heap. More...
|
|
template<typename T > |
Grappa::Message Grappa::impl::MessageBase | __attribute__ ((aligned(64))) |
|
template<typename T > |
Grappa::PayloadMessage Grappa::impl::MessageBase | __attribute__ ((aligned(64))) |
|
template<typename T > |
Message< T > | message (Core dest, T t) |
| Construct a message allocated on the stack. More...
|
|
template<typename T > |
PayloadMessage< T > | message (Core dest, T t, void *payload, size_t payload_size) |
| Message with payload. More...
|
|
template<typename T > |
Message< T > | send_message (Core dest, T t) |
| Same as message, but immediately enqueued to be sent. More...
|
|
template<typename T > |
PayloadMessage< T > | send_message (Core dest, T t, void *payload, size_t payload_size) |
| Message with payload, immediately enqueued to be sent. More...
|
|
template<typename Mutex > |
void | lock (Mutex *t) |
| Verify that Mutex is only one word. More...
|
|
template<typename Mutex > |
bool | trylock (Mutex *t) |
| Try to lock a mutex. Note: wait scheme is unfairly LIFO. More...
|
|
template<typename Mutex > |
bool | is_unlocked (Mutex *t) |
|
template<typename Mutex > |
void | unlock (Mutex *t) |
| Unlock a mutex. Note: wait scheme is unfairly LIFO. More...
|
|
template<typename Mutex > |
void | lock (GlobalAddress< Mutex > m) |
| TODO: implement. More...
|
|
template<typename Mutex > |
void | trylock (GlobalAddress< Mutex > m) |
| TODO: implement. More...
|
|
template<typename Mutex > |
void | unlock (GlobalAddress< Mutex > m) |
| TODO: implement. More...
|
|
GlobalCompletionEvent & | default_gce () |
|
template<SyncMode S = SyncMode::Blocking, TaskMode B = TaskMode::Bound, GlobalCompletionEvent * GCE = nullptr, int64_t Threshold = impl::USE_LOOP_THRESHOLD_FLAG, typename F = decltype(nullptr)> |
void | forall_here (int64_t start, int64_t iters, F loop_body) |
|
| FORALL_HERE_OVERLOAD (TaskMode B, SyncMode S=SyncMode::Blocking, GlobalCompletionEvent *GCE=nullptr, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) |
|
| FORALL_HERE_OVERLOAD (SyncMode S, GlobalCompletionEvent *GCE, TaskMode B=TaskMode::Bound, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) |
|
| FORALL_HERE_OVERLOAD (SyncMode S, GlobalCompletionEvent *GCE, int64_t Threshold, TaskMode B=TaskMode::Bound) |
|
| FORALL_HERE_OVERLOAD (GlobalCompletionEvent *GCE, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG, TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking) |
|
| FORALL_HERE_OVERLOAD (int64_t Threshold, GlobalCompletionEvent *GCE=nullptr, TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking) |
|
| FORALL_OVERLOAD (TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking, GlobalCompletionEvent *C=&impl::local_gce, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) |
|
| FORALL_OVERLOAD (SyncMode S, TaskMode B=TaskMode::Bound, GlobalCompletionEvent *C=&impl::local_gce, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) |
|
| FORALL_OVERLOAD (GlobalCompletionEvent *C, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG, TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking) |
|
| FORALL_OVERLOAD (int64_t Threshold, GlobalCompletionEvent *C=&impl::local_gce, TaskMode B=TaskMode::Bound, SyncMode S=SyncMode::Blocking) |
|
| FORALL_OVERLOAD (TaskMode B, GlobalCompletionEvent *C, SyncMode S=SyncMode::Blocking, int64_t Threshold=impl::USE_LOOP_THRESHOLD_FLAG) |
|
template<typename T > |
std::pair< Core, Core > | cores_with_elements (GlobalAddress< T > base, size_t nelem) |
| Return range of cores that have elements for the given linear address range. More...
|
|
template<GlobalCompletionEvent * GCE = &impl::local_gce, int64_t Threshold = impl::USE_LOOP_THRESHOLD_FLAG, typename T = decltype(nullptr), typename F = decltype(nullptr)> |
void | on_cores_localized_async (GlobalAddress< T > base, int64_t nelems, F do_on_core) |
| Run privateTasks on each core that contains elements of the given region of global memory. More...
|
|
template<TaskMode B = TaskMode::Bound, SyncMode S = SyncMode::Blocking, GlobalCompletionEvent * GCE = &impl::local_gce, int64_t Threshold = impl::USE_LOOP_THRESHOLD_FLAG, typename T = decltype(nullptr), typename F = decltype(nullptr)> |
void | forall (GlobalAddress< T > base, int64_t nelems, F loop_body) |
| Parallel loop over a global array. More...
|
|
template<typename T > |
Grappa::ReuseMessage Grappa::Message | __attribute__ ((aligned(64))) |
|
template<typename Semaphore > |
void | increment (Semaphore *s, int64_t incr=1) |
|
template<typename Semaphore > |
void | decrement (Semaphore *s, int64_t decr=1) |
|
template<typename Semaphore > |
bool | try_decrement (Semaphore *s, int64_t decr=1) |
|
template<typename Semaphore > |
int64_t | get_value (Semaphore *s) |
|
template<typename T > |
Message< T > * | heap_message (Core dest, T t) |
|
template<typename T > |
PayloadMessage< T > * | heap_message (Core dest, T t, void *payload, size_t payload_size) |
| Message with payload, allocated on heap. More...
|
|
template<typename T > |
Message< T > * | send_heap_message (Core dest, T t) |
| Same as message, but allocated on heap and immediately enqueued to be sent. More...
|
|
template<typename T > |
PayloadMessage< T > * | send_heap_message (Core dest, T t, void *payload, size_t payload_size) |
| Message with payload, allocated on heap and immediately enqueued to be sent. More...
|
|
void | invoke (SuspendedDelegate *c) |
|
bool | is_suspended_delegate (Worker *w) |
|
template<typename TF > |
void | privateTask (TF tf) |
| Spawn a task visible to this Core only. More...
|
|
template<typename TF > |
void | publicTask (TF tf) |
| Spawn a task that may be stolen between cores. More...
|
|
template<typename TF > |
void | spawn_worker (TF &&tf) |
| internal More...
|
|
template<TaskMode B = TaskMode::Bound, typename F = decltype(nullptr)> |
void | spawn (F f) |
|
template<typename FP > |
void | run (FP fp) |
|
Worker * | current_worker () |
|