Actual source code: Index.c

 2:  #include petsc.h
 3:  #include petscsys.h

  8: 
 11: int main(int argc,char **argv)
 12: {

 15:   PetscInitialize(&argc,&argv,0,0);
 16: 
 17:   test1();
 18:   test2();

 20:   PetscFinalize();
 21:   return(0);
 22: }

 26: int test1(void)
 27: {
 28:   PetscLogDouble  t1,t2;
 29:   double      value;
 30:   int         i,ierr,*z,*zi,intval;
 31:   PetscScalar *x,*y;
 32:   PetscRandom r;

 34:   PetscRandomCreate(PETSC_COMM_SELF,RANDOM_DEFAULT,&r);
 35:   PetscMalloc(20000*sizeof(PetscScalar),&x);
 36:   PetscMalloc(20000*sizeof(PetscScalar),&y);

 38:   PetscMalloc(2000*sizeof(int),&z);
 39:   PetscMalloc(2000*sizeof(int),&zi);



 43:   /* Take care of paging effects */
 44:   PetscGetTime(&t1);
 45: 
 46:    /* Form the random set of integers */
 47:   for (i=0; i<2000; i++) {
 48:     PetscRandomGetValue(r,&value);
 49:     intval = (int)(value*20000.0);
 50:     z[i]   = intval;
 51:   }

 53:   for (i=0; i<2000; i++) {
 54:     PetscRandomGetValue(r,&value);
 55:     intval  = (int)(value*20000.0);
 56:     zi[i]   = intval;
 57:   }
 58:   /* fprintf(stdout,"Done setup\n"); */

 60:   BlastCache();

 62:   PetscGetTime(&t1);
 63:   for (i=0; i<2000; i++) {  x[i] = y[i]; }
 64:   PetscGetTime(&t2);
 65:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0);

 67:   BlastCache();

 69:   PetscGetTime(&t1);
 70:   for (i=0; i<500; i+=4) {
 71:     x[i]   = y[z[i]];
 72:     x[1+i] = y[z[1+i]];
 73:     x[2+i] = y[z[2+i]];
 74:     x[3+i] = y[z[3+i]];
 75:   }
 76:   PetscGetTime(&t2);
 77:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0);

 79:   BlastCache();

 81:   PetscGetTime(&t1);CHKERRQ(ierr)
 82:   for (i=0; i<2000; i++) {  x[i] = y[z[i]]; }
 83:   PetscGetTime(&t2);
 84:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0);

 86:   BlastCache();

 88:   PetscGetTime(&t1);
 89:   for (i=0; i<1000; i+=2) {  x[i] = y[z[i]];  x[1+i] = y[z[1+i]]; }
 90:   PetscGetTime(&t2);
 91:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0);

 93:   BlastCache();

 95:   PetscGetTime(&t1);
 96:   for (i=0; i<2000; i++) {  x[z[i]] = y[i]; }
 97:   PetscGetTime(&t2);
 98:   fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0);

100:   BlastCache();

102:   PetscGetTime(&t1);
103:   for (i=0; i<2000; i++) {  x[z[i]] = y[zi[i]]; }
104:   PetscGetTime(&t2);
105:   fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0);
106: 
107:   PetscMemcpy(x,y,10);
108:   PetscMemcpy(z,zi,10);
109:   PetscFree(z);
110:   PetscFree(zi);
111:   PetscFree(x);
112:   PetscFree(y);
113:   PetscRandomDestroy(r);
114:   return(0);
115: }

119: int test2(void)
120: {
121:   PetscLogDouble   t1,t2;
122:   double       value;
123:   int          i,ierr,z[20000],zi[20000],intval,tmp;
124:   PetscScalar  x[20000],y[20000];
125:   PetscRandom  r;

127:   PetscRandomCreate(PETSC_COMM_SELF,RANDOM_DEFAULT,&r);

129:   /* Take care of paging effects */
130:   PetscGetTime(&t1);
131: 
132:   for (i=0; i<20000; i++) {
133:     x[i]  = i;
134:     y[i]  = i;
135:     z[i]  = i;
136:     zi[i] = i;
137:   }

139:    /* Form the random set of integers */
140:   for (i=0; i<20000; i++) {
141:     PetscRandomGetValue(r,&value);
142:     intval = (int)(value*20000.0);
143:     tmp    = z[i];
144:     z[i]   = z[intval];
145:     z[intval] = tmp;
146:   }

148:   for (i=0; i<20000; i++) {
149:     PetscRandomGetValue(r,&value);
150:     intval = (int)(value*20000.0);
151:     tmp    = zi[i];
152:     zi[i]  = zi[intval];
153:     zi[intval] = tmp;
154:   }
155:   /* fprintf(stdout,"Done setup\n"); */

157:   /* BlastCache(); */

159:   PetscGetTime(&t1);
160:   for (i=0; i<2000; i++) {  x[i] = y[i]; }
161:   PetscGetTime(&t2);
162:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0);

164:   /* BlastCache(); */

166:   PetscGetTime(&t1);
167:   for (i=0; i<2000; i++) {  y[i] = x[z[i]]; }
168:   PetscGetTime(&t2);
169:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0);

171:   /* BlastCache(); */

173:   PetscGetTime(&t1);
174:   for (i=0; i<2000; i++) {  x[z[i]] = y[i]; }
175:   PetscGetTime(&t2);
176:   fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0);

178:   /* BlastCache(); */

180:   PetscGetTime(&t1);
181:   for (i=0; i<2000; i++) {  y[z[i]] = x[zi[i]]; }
182:   PetscGetTime(&t2);
183:   fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0);


186:   PetscRandomDestroy(r);
187:   return(0);
188: }

192: int BlastCache(void)
193: {
194:   int    i,ierr,n = 1000000;
195:   PetscScalar *x,*y,*z,*a,*b;

197:   PetscMalloc(5*n*sizeof(PetscScalar),&x);
198:   y = x + n;
199:   z = y + n;
200:   a = z + n;
201:   b = a + n;

203:   for (i=0; i<n; i++) {
204:     a[i] = (PetscScalar) i;
205:     y[i] = (PetscScalar) i;
206:     z[i] = (PetscScalar) i;
207:     b[i] = (PetscScalar) i;
208:     x[i] = (PetscScalar) i;
209:   }

211:   for (i=0; i<n; i++) {
212:     a[i] = 3.0*x[i] + 2.0*y[i] + 3.3*z[i] - 25.*b[i];
213:   }
214:   for (i=0; i<n; i++) {
215:     b[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i];
216:   }
217:   for (i=0; i<n; i++) {
218:     z[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i];
219:   }
220:   PetscFree(x);
221:   return(0);
222: }