Actual source code: matstash.c
1: #define PETSCMAT_DLL
3: #include src/mat/matimpl.h
4: #include src/mat/utils/matstashspace.h
6: /*
7: The input to the stash is ALWAYS in MatScalar precision, and the
8: internal storage and output is also in MatScalar.
9: */
10: #define DEFAULT_STASH_SIZE 10000
12: /*
13: MatStashCreate_Private - Creates a stash,currently used for all the parallel
14: matrix implementations. The stash is where elements of a matrix destined
15: to be stored on other processors are kept until matrix assembly is done.
17: This is a simple minded stash. Simply adds entries to end of stash.
19: Input Parameters:
20: comm - communicator, required for scatters.
21: bs - stash block size. used when stashing blocks of values
23: Output Parameters:
24: stash - the newly created stash
25: */
28: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
29: {
31: PetscInt max,*opt,nopt;
32: PetscTruth flg;
35: /* Require 2 tags,get the second using PetscCommGetNewTag() */
36: stash->comm = comm;
37: PetscCommGetNewTag(stash->comm,&stash->tag1);
38: PetscCommGetNewTag(stash->comm,&stash->tag2);
39: MPI_Comm_size(stash->comm,&stash->size);
40: MPI_Comm_rank(stash->comm,&stash->rank);
42: nopt = stash->size;
43: PetscMalloc(nopt*sizeof(PetscInt),&opt);
44: PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
45: if (flg) {
46: if (nopt == 1) max = opt[0];
47: else if (nopt == stash->size) max = opt[stash->rank];
48: else if (stash->rank < nopt) max = opt[stash->rank];
49: else max = 0; /* Use default */
50: stash->umax = max;
51: } else {
52: stash->umax = 0;
53: }
54: PetscFree(opt);
55: if (bs <= 0) bs = 1;
57: stash->bs = bs;
58: stash->nmax = 0;
59: stash->oldnmax = 0;
60: stash->n = 0;
61: stash->reallocs = -1;
62: stash->space_head = 0;
63: stash->space = 0;
65: stash->send_waits = 0;
66: stash->recv_waits = 0;
67: stash->send_status = 0;
68: stash->nsends = 0;
69: stash->nrecvs = 0;
70: stash->svalues = 0;
71: stash->rvalues = 0;
72: stash->rindices = 0;
73: stash->nprocs = 0;
74: stash->nprocessed = 0;
75: return(0);
76: }
78: /*
79: MatStashDestroy_Private - Destroy the stash
80: */
83: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84: {
88: if (stash->space_head){
89: PetscMatStashSpaceDestroy(stash->space_head);
90: stash->space_head = 0;
91: stash->space = 0;
92: }
93: return(0);
94: }
96: /*
97: MatStashScatterEnd_Private - This is called as the fial stage of
98: scatter. The final stages of messagepassing is done here, and
99: all the memory used for messagepassing is cleanedu up. This
100: routine also resets the stash, and deallocates the memory used
101: for the stash. It also keeps track of the current memory usage
102: so that the same value can be used the next time through.
103: */
106: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107: {
109: PetscInt nsends=stash->nsends,bs2,oldnmax;
110: MPI_Status *send_status;
113: /* wait on sends */
114: if (nsends) {
115: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
116: MPI_Waitall(2*nsends,stash->send_waits,send_status);
117: PetscFree(send_status);
118: }
120: /* Now update nmaxold to be app 10% more than max n used, this way the
121: wastage of space is reduced the next time this stash is used.
122: Also update the oldmax, only if it increases */
123: if (stash->n) {
124: bs2 = stash->bs*stash->bs;
125: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
126: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
127: }
129: stash->nmax = 0;
130: stash->n = 0;
131: stash->reallocs = -1;
132: stash->nprocessed = 0;
133: if (stash->space_head){
134: PetscMatStashSpaceDestroy(stash->space_head);
135: stash->space_head = 0;
136: stash->space = 0;
137: }
138: PetscFree(stash->send_waits);
139: stash->send_waits = 0;
140: PetscFree(stash->recv_waits);
141: stash->recv_waits = 0;
142: PetscFree(stash->svalues);
143: stash->svalues = 0;
144: PetscFree(stash->rvalues);
145: stash->rvalues = 0;
146: PetscFree(stash->rindices);
147: stash->rindices = 0;
148: PetscFree(stash->nprocs);
149: stash->nprocs = 0;
150: return(0);
151: }
153: /*
154: MatStashGetInfo_Private - Gets the relavant statistics of the stash
156: Input Parameters:
157: stash - the stash
158: nstash - the size of the stash. Indicates the number of values stored.
159: reallocs - the number of additional mallocs incurred.
160:
161: */
164: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
165: {
166: PetscInt bs2 = stash->bs*stash->bs;
169: if (nstash) *nstash = stash->n*bs2;
170: if (reallocs) {
171: if (stash->reallocs < 0) *reallocs = 0;
172: else *reallocs = stash->reallocs;
173: }
174: return(0);
175: }
177: /*
178: MatStashSetInitialSize_Private - Sets the initial size of the stash
180: Input Parameters:
181: stash - the stash
182: max - the value that is used as the max size of the stash.
183: this value is used while allocating memory.
184: */
187: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
188: {
190: stash->umax = max;
191: return(0);
192: }
194: /* MatStashExpand_Private - Expand the stash. This function is called
195: when the space in the stash is not sufficient to add the new values
196: being inserted into the stash.
197:
198: Input Parameters:
199: stash - the stash
200: incr - the minimum increase requested
201:
202: Notes:
203: This routine doubles the currently used memory.
204: */
207: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
208: {
210: PetscInt newnmax,bs2= stash->bs*stash->bs;
213: /* allocate a larger stash */
214: if (!stash->oldnmax && !stash->nmax) { /* new stash */
215: if (stash->umax) newnmax = stash->umax/bs2;
216: else newnmax = DEFAULT_STASH_SIZE/bs2;
217: } else if (!stash->nmax) { /* resuing stash */
218: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
219: else newnmax = stash->oldnmax/bs2;
220: } else newnmax = stash->nmax*2;
221: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
223: /* Get a MatStashSpace and attach it to stash */
224: if (!stash->nmax) { /* new stash or resuing stash->oldnmax */
225: PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);
226: stash->space = stash->space_head;
227: } else {
228: PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
229: }
230: stash->reallocs++;
231: stash->nmax = newnmax;
232: return(0);
233: }
234: /*
235: MatStashValuesRow_Private - inserts values into the stash. This function
236: expects the values to be roworiented. Multiple columns belong to the same row
237: can be inserted with a single call to this function.
239: Input Parameters:
240: stash - the stash
241: row - the global row correspoiding to the values
242: n - the number of elements inserted. All elements belong to the above row.
243: idxn - the global column indices corresponding to each of the values.
244: values - the values inserted
245: */
248: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
249: {
250: PetscErrorCode ierr;
251: PetscInt i,k;
252: PetscMatStashSpace space=stash->space;
255: /* Check and see if we have sufficient memory */
256: if (!space || space->local_remaining < n){
257: MatStashExpand_Private(stash,n);
258: }
259: space = stash->space;
260: k = space->local_used;
261: for (i=0; i<n; i++) {
262: space->idx[k] = row;
263: space->idy[k] = idxn[i];
264: space->val[k] = values[i];
265: k++;
266: }
267: stash->n += n;
268: space->local_used += n;
269: space->local_remaining -= n;
270: return(0);
271: }
273: /*
274: MatStashValuesCol_Private - inserts values into the stash. This function
275: expects the values to be columnoriented. Multiple columns belong to the same row
276: can be inserted with a single call to this function.
278: Input Parameters:
279: stash - the stash
280: row - the global row correspoiding to the values
281: n - the number of elements inserted. All elements belong to the above row.
282: idxn - the global column indices corresponding to each of the values.
283: values - the values inserted
284: stepval - the consecutive values are sepated by a distance of stepval.
285: this happens because the input is columnoriented.
286: */
289: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
290: {
291: PetscErrorCode ierr;
292: PetscInt i,k;
293: PetscMatStashSpace space=stash->space;
296: /* Check and see if we have sufficient memory */
297: if (!space || space->local_remaining < n){
298: MatStashExpand_Private(stash,n);
299: }
300: space = stash->space;
301: k = space->local_used;
302: for (i=0; i<n; i++) {
303: space->idx[k] = row;
304: space->idy[k] = idxn[i];
305: space->val[k] = values[i*stepval];
306: k++;
307: }
308: stash->n += n;
309: space->local_used += n;
310: space->local_remaining -= n;
311: return(0);
312: }
314: /*
315: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
316: This function expects the values to be roworiented. Multiple columns belong
317: to the same block-row can be inserted with a single call to this function.
318: This function extracts the sub-block of values based on the dimensions of
319: the original input block, and the row,col values corresponding to the blocks.
321: Input Parameters:
322: stash - the stash
323: row - the global block-row correspoiding to the values
324: n - the number of elements inserted. All elements belong to the above row.
325: idxn - the global block-column indices corresponding to each of the blocks of
326: values. Each block is of size bs*bs.
327: values - the values inserted
328: rmax - the number of block-rows in the original block.
329: cmax - the number of block-columsn on the original block.
330: idx - the index of the current block-row in the original block.
331: */
334: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
335: {
336: PetscErrorCode ierr;
337: PetscInt i,j,k,bs2,bs=stash->bs,l;
338: const MatScalar *vals;
339: MatScalar *array;
340: PetscMatStashSpace space=stash->space;
343: if (!space || space->local_remaining < n){
344: MatStashExpand_Private(stash,n);
345: }
346: space = stash->space;
347: l = space->local_used;
348: bs2 = bs*bs;
349: for (i=0; i<n; i++) {
350: space->idx[l] = row;
351: space->idy[l] = idxn[i];
352: /* Now copy over the block of values. Store the values column oriented.
353: This enables inserting multiple blocks belonging to a row with a single
354: funtion call */
355: array = space->val + bs2*l;
356: vals = values + idx*bs2*n + bs*i;
357: for (j=0; j<bs; j++) {
358: for (k=0; k<bs; k++) array[k*bs] = vals[k];
359: array++;
360: vals += cmax*bs;
361: }
362: l++;
363: }
364: stash->n += n;
365: space->local_used += n;
366: space->local_remaining -= n;
367: return(0);
368: }
370: /*
371: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
372: This function expects the values to be roworiented. Multiple columns belong
373: to the same block-row can be inserted with a single call to this function.
374: This function extracts the sub-block of values based on the dimensions of
375: the original input block, and the row,col values corresponding to the blocks.
377: Input Parameters:
378: stash - the stash
379: row - the global block-row correspoiding to the values
380: n - the number of elements inserted. All elements belong to the above row.
381: idxn - the global block-column indices corresponding to each of the blocks of
382: values. Each block is of size bs*bs.
383: values - the values inserted
384: rmax - the number of block-rows in the original block.
385: cmax - the number of block-columsn on the original block.
386: idx - the index of the current block-row in the original block.
387: */
390: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
391: {
392: PetscErrorCode ierr;
393: PetscInt i,j,k,bs2,bs=stash->bs,l;
394: const MatScalar *vals;
395: MatScalar *array;
396: PetscMatStashSpace space=stash->space;
399: if (!space || space->local_remaining < n){
400: MatStashExpand_Private(stash,n);
401: }
402: space = stash->space;
403: l = space->local_used;
404: bs2 = bs*bs;
405: for (i=0; i<n; i++) {
406: space->idx[l] = row;
407: space->idy[l] = idxn[i];
408: /* Now copy over the block of values. Store the values column oriented.
409: This enables inserting multiple blocks belonging to a row with a single
410: funtion call */
411: array = space->val + bs2*l;
412: vals = values + idx*bs2*n + bs*i;
413: for (j=0; j<bs; j++) {
414: for (k=0; k<bs; k++) {array[k] = vals[k];}
415: array += bs;
416: vals += rmax*bs;
417: }
418: l++;
419: }
420: stash->n += n;
421: space->local_used += n;
422: space->local_remaining -= n;
423: return(0);
424: }
425: /*
426: MatStashScatterBegin_Private - Initiates the transfer of values to the
427: correct owners. This function goes through the stash, and check the
428: owners of each stashed value, and sends the values off to the owner
429: processors.
431: Input Parameters:
432: stash - the stash
433: owners - an array of size 'no-of-procs' which gives the ownership range
434: for each node.
436: Notes: The 'owners' array in the cased of the blocked-stash has the
437: ranges specified blocked global indices, and for the regular stash in
438: the proper global indices.
439: */
442: PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
443: {
444: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
445: PetscInt size=stash->size,nsends;
447: PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l;
448: MatScalar **rvalues,*svalues;
449: MPI_Comm comm = stash->comm;
450: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
451: PetscMPIInt *nprocs,*nlengths,nreceives;
452: PetscInt *sp_idx,*sp_idy;
453: MatScalar *sp_val;
454: PetscMatStashSpace space,space_next;
457: bs2 = stash->bs*stash->bs;
458:
459: /* first count number of contributors to each processor */
460: PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);
461: PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));
462: PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);
464: nlengths = nprocs+size;
465: i = j = 0;
466: lastidx = -1;
467: space = stash->space_head;
468: while (space != PETSC_NULL){
469: space_next = space->next;
470: sp_idx = space->idx;
471: for (l=0; l<space->local_used; l++){
472: /* if indices are NOT locally sorted, need to start search at the beginning */
473: if (lastidx > (idx = sp_idx[l])) j = 0;
474: lastidx = idx;
475: for (; j<size; j++) {
476: if (idx >= owners[j] && idx < owners[j+1]) {
477: nlengths[j]++; owner[i] = j; break;
478: }
479: }
480: i++;
481: }
482: space = space_next;
483: }
484: /* Now check what procs get messages - and compute nsends. */
485: for (i=0, nsends=0 ; i<size; i++) {
486: if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
487: }
489: { int *onodes,*olengths;
490: /* Determine the number of messages to expect, their lengths, from from-ids */
491: PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);
492: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
493: /* since clubbing row,col - lengths are multiplied by 2 */
494: for (i=0; i<nreceives; i++) olengths[i] *=2;
495: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
496: /* values are size 'bs2' lengths (and remove earlier factor 2 */
497: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
498: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
499: PetscFree(onodes);
500: PetscFree(olengths);
501: }
503: /* do sends:
504: 1) starts[i] gives the starting index in svalues for stuff going to
505: the ith processor
506: */
507: PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);
508: sindices = (PetscInt*)(svalues + bs2*stash->n);
509: PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
510: PetscMalloc(2*size*sizeof(PetscInt),&startv);
511: starti = startv + size;
512: /* use 2 sends the first with all_a, the next with all_i and all_j */
513: startv[0] = 0; starti[0] = 0;
514: for (i=1; i<size; i++) {
515: startv[i] = startv[i-1] + nlengths[i-1];
516: starti[i] = starti[i-1] + nlengths[i-1]*2;
517: }
518:
519: i = 0;
520: space = stash->space_head;
521: while (space != PETSC_NULL){
522: space_next = space->next;
523: sp_idx = space->idx;
524: sp_idy = space->idy;
525: sp_val = space->val;
526: for (l=0; l<space->local_used; l++){
527: j = owner[i];
528: if (bs2 == 1) {
529: svalues[startv[j]] = sp_val[l];
530: } else {
531: PetscInt k;
532: MatScalar *buf1,*buf2;
533: buf1 = svalues+bs2*startv[j];
534: buf2 = space->val + bs2*i;
535: for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
536: }
537: sindices[starti[j]] = sp_idx[l];
538: sindices[starti[j]+nlengths[j]] = sp_idy[l];
539: startv[j]++;
540: starti[j]++;
541: i++;
542: }
543: space = space_next;
544: }
545: startv[0] = 0;
546: for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
548: for (i=0,count=0; i<size; i++) {
549: if (nprocs[i]) {
550: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
551: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);
552: }
553: }
554: #if defined(PETSC_USE_INFO)
555: PetscInfo1(0,"No of messages: %d \n",nsends);
556: for (i=0; i<size; i++) {
557: if (nprocs[i]) {
558: PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));
559: }
560: }
561: #endif
562: PetscFree(owner);
563: PetscFree(startv);
564: /* This memory is reused in scatter end for a different purpose*/
565: for (i=0; i<2*size; i++) nprocs[i] = -1;
566: stash->nprocs = nprocs;
567:
568: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
569: PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
571: for (i=0; i<nreceives; i++) {
572: recv_waits[2*i] = recv_waits1[i];
573: recv_waits[2*i+1] = recv_waits2[i];
574: }
575: stash->recv_waits = recv_waits;
576: PetscFree(recv_waits1);
577: PetscFree(recv_waits2);
579: stash->svalues = svalues; stash->rvalues = rvalues;
580: stash->rindices = rindices; stash->send_waits = send_waits;
581: stash->nsends = nsends; stash->nrecvs = nreceives;
582: return(0);
583: }
585: /*
586: MatStashScatterGetMesg_Private - This function waits on the receives posted
587: in the function MatStashScatterBegin_Private() and returns one message at
588: a time to the calling function. If no messages are left, it indicates this
589: by setting flg = 0, else it sets flg = 1.
591: Input Parameters:
592: stash - the stash
594: Output Parameters:
595: nvals - the number of entries in the current message.
596: rows - an array of row indices (or blocked indices) corresponding to the values
597: cols - an array of columnindices (or blocked indices) corresponding to the values
598: vals - the values
599: flg - 0 indicates no more message left, and the current call has no values associated.
600: 1 indicates that the current call successfully received a message, and the
601: other output parameters nvals,rows,cols,vals are set appropriately.
602: */
605: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
606: {
608: PetscMPIInt i,*flg_v,i1,i2;
609: PetscInt bs2;
610: MPI_Status recv_status;
611: PetscTruth match_found = PETSC_FALSE;
615: *flg = 0; /* When a message is discovered this is reset to 1 */
616: /* Return if no more messages to process */
617: if (stash->nprocessed == stash->nrecvs) { return(0); }
619: flg_v = stash->nprocs;
620: bs2 = stash->bs*stash->bs;
621: /* If a matching pair of receieves are found, process them, and return the data to
622: the calling function. Until then keep receiving messages */
623: while (!match_found) {
624: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
625: /* Now pack the received message into a structure which is useable by others */
626: if (i % 2) {
627: MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);
628: flg_v[2*recv_status.MPI_SOURCE] = i/2;
629: *nvals = *nvals/bs2;
630: } else {
631: MPI_Get_count(&recv_status,MPIU_INT,nvals);
632: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
633: *nvals = *nvals/2; /* This message has both row indices and col indices */
634: }
635:
636: /* Check if we have both the messages from this proc */
637: i1 = flg_v[2*recv_status.MPI_SOURCE];
638: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
639: if (i1 != -1 && i2 != -1) {
640: *rows = stash->rindices[i2];
641: *cols = *rows + *nvals;
642: *vals = stash->rvalues[i1];
643: *flg = 1;
644: stash->nprocessed ++;
645: match_found = PETSC_TRUE;
646: }
647: }
648: return(0);
649: }