/*
* example.c
* joseph adler
* august 1995
*
* Copyright (C) 1995 Massachusetts Institute of Technology
*
* Permission to use, copy, modify, distribute, and sell this software
* and its documentation for any purpose is hereby granted without
* fee, provided that the above copyright notice appear in all copies
* and that both that copyright notice and this permission notice
* appear in supporting documentation. The author makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied
* warranty.
*
* THE AUTHORS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
* NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* RCS Id: example.c,v 1.12 1995/08/24 08:24:34 tuna Exp
*/
/* This example program is designed to help guide the user through
* programming a simple program with CRL. It is a simple matrix
* multiply program, with some extra hooks for dividing work in some
* reasonable fashion.
*
* Simple serial version, adopted from:
*
* _Introduction to Algorithms_. Thomas H. Cormen, Charles
* E. Leiserson, and Ronald L. Rivest. MIT Press, 1990.
*
* void mm(int a[N][N], int b[N][N], int c[N][N])
* {
* int i, j, k;
*
* for (i=0; i<N; i++)
* {
* for (j=0; j<N; j++)
* {
* c[i][j] = 0;
* for (k=0; k<N; k++)
* c[i][j] = c[i][j] + (a[i][k] * b[k][j]);
* }
* }
* }
*
* Because the purpose of this example is to illustrate how to program
* in CRL, the example is intentionally kept quite simple (e.g., the
* two input matrices are hard coded; the particular work and data
* decomposition used is not particularly efficient).
*/
/* We begin by #including "crl.h", #defining some constants, and
* declaring the two input matrices.
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#if defined(CM5)
#include <cm/cmmd.h>
#elif defined(ALEWIFE)
#include <parallel.h>
#endif
#include "crl.h"
/* MASTER is the node responsible for creating the regions
* used to represent the destination matrix (c)
*/
#define MASTER (0)
/* N is the size of the (square) matrices used
*/
#define N (5)
/* IntMatrix is a typedef for an N by N matrix of int elements
*/
typedef int IntMatrix[N][N];
/* RidMatrix is a typedef for an N by N matrix of rid_t elements
* (an rid_t is a CRL region identifier)
*/
typedef rid_t RidMatrix[N][N];
/* the first input matrix (static; replicated on all processors)
*/
IntMatrix a = {{ 2, 0, 0, 0, 0},
{ 0, 2, 0, 0, 0},
{ 0, 0, 2, 0, 0},
{ 0, 0, 0, 2, 0},
{ 0, 0, 0, 0, 2}};
/* the second input matrix (static; replicated on all processors)
*/
IntMatrix b = {{ 1, 2, 3, 4, 5},
{ 6, 7, 8, 9, 10},
{11, 12, 13, 14, 15},
{16, 17, 18, 19, 20},
{21, 22, 23, 24, 25}};
/* Now we need to consider the question of how we divide the work
* involved in matrix multiplication. Basically, we want the program
* to proceed as follows:
*
* 1. The MASTER node creates the regions used to represent the
* destination matrix (c).
*
* 2. Each node does its share of the computation and stops at a
* barrier when done.
*
* 3. After nodes return from the barrier, all work is done. The
* MASTER node prints out the values found in the destination
* matrix and all nodes exit.
*
* To implement this, we create a matrix of region identifiers, each
* of which will in turn refer to a region which contains the value
* of that element of the destination matrix (c).
*/
RidMatrix c;
/* We divide work in this program so that each node is called upon to
* perform approximately the same amount of work. The scheme which is
* used is fairly simple: we color each square in the matrix with a
* node index number, traversing the n by n matrix from left to right,
* and top to bottom, counting off node indices in ascending order. For
* example, with seven nodes and a 4 by 4 matrix, assignments would
* look like:
*
* 0 1 2 3
* 4 5 6 0
* 1 2 3 4
* 5 6 0 1
*/
/* If we're using the TCP/UNIX version of CRL, the main procedure can
* be found in ../pvm.common/pvm_app.c. This procedure takes care of
* setting up a pvm group, and spawning processes. It then calls
* main2(). Please see this code for more information on how to set up
* a program to run using pvm.
*
* If we're running CRL on any other platform, then we need to include
* a main procedure that calls main2().
*/
extern int main2(int, char **);
#if defined(TCPUNIX)
extern char *GROUP;
#else
int main(int argc, char **argv)
{
#if defined(CM5)
CMMD_set_io_mode(0, CMMD_independent);
CMMD_set_io_mode(1, CMMD_independent);
CMMD_set_io_mode(2, CMMD_independent);
#endif
#if defined(ALEWIFE)
do_in_parallel(main2, argc, argv);
#else
main2(argc, argv);
#endif
return 0;
}
#endif
int main2(int argc, char **argv)
{
int index;
int row, col;
int k;
int *tmp;
/* initialize CRL
*/
#if defined(TCPUNIX)
crl_init(GROUP);
#else
crl_init();
#endif
if (crl_self_addr == MASTER)
{
/* on the MASTER node, create all the regions used to represent
* the destination matrix
*/
for (row=0; row<N; row++)
for (col=0; col<N; col++)
c[row][col] = rgn_create(sizeof(int));
/* broadcast this set of region identifiers to all other nodes
*/
rgn_bcast_send(sizeof(RidMatrix), c);
}
else
{
/* receive the set of region identifiers broadcast by the MASTER
* node
*/
rgn_bcast_recv(sizeof(RidMatrix), c);
}
/* work distribution algorithm:
*
* Each node then computes the value of all destination matrix
* elements where the index of the element in the matrix (numbering
* from left to right, top to bottom) modulo the number of nodes is
* equal to the local node index.
*
* that is, if
*
* index = (N * row) + col
*
* each node computes all destination matrix elements for which
*
* (index % crl_num_nodes) == crl_self_addr
*/
for (index=crl_self_addr; index<(N*N); index+=crl_num_nodes)
{
row = index / N;
col = index % N;
printf("node %d computing c[%d][%d]\n", crl_self_addr, row, col);
/* now, map the region representing the current destination matrix
* element and initiate a write so we can write to it
*/
tmp = rgn_map(c[row][col]);
rgn_start_write(tmp);
/* compute the value of this element of the destination matrix
*/
*tmp = 0;
for (k=0; k<N; k++)
*tmp += (a[row][k] * b[k][col]);
/* since we're done with this element of the destination matrix,
* terminate the write operation and unmap the region
*/
rgn_end_write(tmp);
rgn_unmap(tmp);
}
/* wait for all nodes to finish computing
*/
rgn_barrier();
/* MASTER node prints out the destination matrix
*/
if (crl_self_addr == MASTER)
{
printf("Final matrix:\n");
for (row=0; row<N; row++)
{
for (col=0; col<N; col++)
{
/* map the region representing the current element and
* initiate a read operation
*/
tmp = (int *) rgn_map(c[row][col]);
rgn_start_read(tmp);
printf("%6d ", *tmp);
/* terminate the read operation and unmap the region
*/
rgn_end_read(tmp);
rgn_unmap(tmp);
}
printf("\n");
}
}
#if defined(TCPUNIX)
crl_exit();
#endif
}