/* * example.c * joseph adler * august 1995 * * Copyright (C) 1995 Massachusetts Institute of Technology * * Permission to use, copy, modify, distribute, and sell this software * and its documentation for any purpose is hereby granted without * fee, provided that the above copyright notice appear in all copies * and that both that copyright notice and this permission notice * appear in supporting documentation. The author makes no * representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THE AUTHORS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * RCS Id: example.c,v 1.12 1995/08/24 08:24:34 tuna Exp */ /* This example program is designed to help guide the user through * programming a simple program with CRL. It is a simple matrix * multiply program, with some extra hooks for dividing work in some * reasonable fashion. * * Simple serial version, adopted from: * * _Introduction to Algorithms_. Thomas H. Cormen, Charles * E. Leiserson, and Ronald L. Rivest. MIT Press, 1990. * * void mm(int a[N][N], int b[N][N], int c[N][N]) * { * int i, j, k; * * for (i=0; i<N; i++) * { * for (j=0; j<N; j++) * { * c[i][j] = 0; * for (k=0; k<N; k++) * c[i][j] = c[i][j] + (a[i][k] * b[k][j]); * } * } * } * * Because the purpose of this example is to illustrate how to program * in CRL, the example is intentionally kept quite simple (e.g., the * two input matrices are hard coded; the particular work and data * decomposition used is not particularly efficient). */ /* We begin by #including "crl.h", #defining some constants, and * declaring the two input matrices. */ #include <stdio.h> #include <stdlib.h> #include <math.h> #if defined(CM5) #include <cm/cmmd.h> #elif defined(ALEWIFE) #include <parallel.h> #endif #include "crl.h" /* MASTER is the node responsible for creating the regions * used to represent the destination matrix (c) */ #define MASTER (0) /* N is the size of the (square) matrices used */ #define N (5) /* IntMatrix is a typedef for an N by N matrix of int elements */ typedef int IntMatrix[N][N]; /* RidMatrix is a typedef for an N by N matrix of rid_t elements * (an rid_t is a CRL region identifier) */ typedef rid_t RidMatrix[N][N]; /* the first input matrix (static; replicated on all processors) */ IntMatrix a = {{ 2, 0, 0, 0, 0}, { 0, 2, 0, 0, 0}, { 0, 0, 2, 0, 0}, { 0, 0, 0, 2, 0}, { 0, 0, 0, 0, 2}}; /* the second input matrix (static; replicated on all processors) */ IntMatrix b = {{ 1, 2, 3, 4, 5}, { 6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}, {16, 17, 18, 19, 20}, {21, 22, 23, 24, 25}}; /* Now we need to consider the question of how we divide the work * involved in matrix multiplication. Basically, we want the program * to proceed as follows: * * 1. The MASTER node creates the regions used to represent the * destination matrix (c). * * 2. Each node does its share of the computation and stops at a * barrier when done. * * 3. After nodes return from the barrier, all work is done. The * MASTER node prints out the values found in the destination * matrix and all nodes exit. * * To implement this, we create a matrix of region identifiers, each * of which will in turn refer to a region which contains the value * of that element of the destination matrix (c). */ RidMatrix c; /* We divide work in this program so that each node is called upon to * perform approximately the same amount of work. The scheme which is * used is fairly simple: we color each square in the matrix with a * node index number, traversing the n by n matrix from left to right, * and top to bottom, counting off node indices in ascending order. For * example, with seven nodes and a 4 by 4 matrix, assignments would * look like: * * 0 1 2 3 * 4 5 6 0 * 1 2 3 4 * 5 6 0 1 */ /* If we're using the TCP/UNIX version of CRL, the main procedure can * be found in ../pvm.common/pvm_app.c. This procedure takes care of * setting up a pvm group, and spawning processes. It then calls * main2(). Please see this code for more information on how to set up * a program to run using pvm. * * If we're running CRL on any other platform, then we need to include * a main procedure that calls main2(). */ extern int main2(int, char **); #if defined(TCPUNIX) extern char *GROUP; #else int main(int argc, char **argv) { #if defined(CM5) CMMD_set_io_mode(0, CMMD_independent); CMMD_set_io_mode(1, CMMD_independent); CMMD_set_io_mode(2, CMMD_independent); #endif #if defined(ALEWIFE) do_in_parallel(main2, argc, argv); #else main2(argc, argv); #endif return 0; } #endif int main2(int argc, char **argv) { int index; int row, col; int k; int *tmp; /* initialize CRL */ #if defined(TCPUNIX) crl_init(GROUP); #else crl_init(); #endif if (crl_self_addr == MASTER) { /* on the MASTER node, create all the regions used to represent * the destination matrix */ for (row=0; row<N; row++) for (col=0; col<N; col++) c[row][col] = rgn_create(sizeof(int)); /* broadcast this set of region identifiers to all other nodes */ rgn_bcast_send(sizeof(RidMatrix), c); } else { /* receive the set of region identifiers broadcast by the MASTER * node */ rgn_bcast_recv(sizeof(RidMatrix), c); } /* work distribution algorithm: * * Each node then computes the value of all destination matrix * elements where the index of the element in the matrix (numbering * from left to right, top to bottom) modulo the number of nodes is * equal to the local node index. * * that is, if * * index = (N * row) + col * * each node computes all destination matrix elements for which * * (index % crl_num_nodes) == crl_self_addr */ for (index=crl_self_addr; index<(N*N); index+=crl_num_nodes) { row = index / N; col = index % N; printf("node %d computing c[%d][%d]\n", crl_self_addr, row, col); /* now, map the region representing the current destination matrix * element and initiate a write so we can write to it */ tmp = rgn_map(c[row][col]); rgn_start_write(tmp); /* compute the value of this element of the destination matrix */ *tmp = 0; for (k=0; k<N; k++) *tmp += (a[row][k] * b[k][col]); /* since we're done with this element of the destination matrix, * terminate the write operation and unmap the region */ rgn_end_write(tmp); rgn_unmap(tmp); } /* wait for all nodes to finish computing */ rgn_barrier(); /* MASTER node prints out the destination matrix */ if (crl_self_addr == MASTER) { printf("Final matrix:\n"); for (row=0; row<N; row++) { for (col=0; col<N; col++) { /* map the region representing the current element and * initiate a read operation */ tmp = (int *) rgn_map(c[row][col]); rgn_start_read(tmp); printf("%6d ", *tmp); /* terminate the read operation and unmap the region */ rgn_end_read(tmp); rgn_unmap(tmp); } printf("\n"); } } #if defined(TCPUNIX) crl_exit(); #endif }

23 August 1995