CSC/ECE 506 Spring 2010/ch 2 maf: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
|||
Line 132: | Line 132: | ||
SUM = LOCAL_SUM[0] + LOCAL_SUM[1] | SUM = LOCAL_SUM[0] + LOCAL_SUM[1] | ||
WRITE(*,*) SUM | WRITE(*,*) SUM | ||
// C++ with OpenMP | |||
#include <omp.h> | |||
#include <iostream> | |||
int main(void) | |||
{ | |||
double a[8], b[8], c[8], localSum[2]; | |||
long s = 4; | |||
int id, i; | |||
#pragma omp parallel for private(id, i) reduction(+:s) | |||
for (int i = 0; i < 8; i++) | |||
{ | |||
a[i] = b[i] + c[i]; | |||
} | |||
for (int i = 0; i < 2; i++) localSum[i] = 0; | |||
#pragma omp parallel for private(id, i) reduction(+:s) | |||
for (int i = 0; i < 8; i++) | |||
{ | |||
id = omp_get_thread_num(); | |||
if (a[i] > 0) | |||
localSum[id] = localSum[id] + a[i]; | |||
} | |||
double sum = localSum[0] + localSum[1]; | |||
std::cout << sum << std::endl; | |||
} | |||
===Hardware Examples=== | ===Hardware Examples=== |
Revision as of 17:47, 28 January 2010
Supplement to Chapter 2: The Data Parallel Programming Model
This chapter is a supplement to Chapter 2 of the Solihin textbook. The textbook covers the shared memory and message passing parallel programming models. However, it does not address the data parallel model
Overview
Aspects | Shared Memory | Message Passing | Data Parallel |
---|---|---|---|
Communication | implicit (via loads/stores) | explicit messages | implicit |
Synchronization | explicit | implicit (via messages) | implicit for SIMD; explicit for SPMD |
Hardware support | typically required | none | |
Development effort | lower | higher | higher |
Tuning effort | higher | lower |
A Code Example
// Simple sequential code from Solihin 2008, page 25. for (i = 0; i < 8; i++) a[i] = b[i] + c[i]; sum = 0; for (i = 0; i < 8; i++) if (a[i] > 0) sum = sum + a[i]; Print sum;
// Data parallel implementation in C++ with OpenMP. int main(void) { double a[8], b[8], c[8], localSum[2]; #pragma omp parallel for for (int id = 0; id < 2; id++) { int local_iter = 4; int start_iter = id * local_iter; int end_iter = start_iter + local_iter; for (int i = start_iter; i < end_iter; i++) a[i] = b[i] + c[i]; local_sum[id] = 0; for (int i = start_iter; i < end_iter; i++) if (a[i] > 0) localSum[id] = localSum[id] + a[i]; } double sum = localSum[0] + localSum[1]; cout << sum; }
// Data parallel implementation in C for CUDA. __global__ void kernel( double* a, double* b, double* c, double* localSum) { int id = threadIdx.x; int local_iter = 4; int start_iter = id * local_iter; int end_iter = start_iter + local_iter; for (int i = start_iter; i < end_iter; i++) a[i] = b[i] + c[i]; local_sum[id] = 0; for (int i = start_iter; i < end_iter; i++) if (a[i] > 0) localSum[id] = localSum[id] + a[i]; } int main() { double a[8], b[8], c[8], localSum[2]; kernel<<<1, 2>>>(a, b, c, localSum); double sum = localSum[0] + localSum[1]; cout << sum; }
C DATA PARALLEL IMPLEMENTATION IN FORTRAN REAL A(8), B(8), C(8), LOCAL_SUM(2), SUM FORALL ID = 1:2 LOCAL_ITER = 4 START_ITER = (ID - 1) * LOCAL_ITER + 1 END_ITER = START_ITER + LOCAL_ITER - 1 DO I = START_ITER:END_ITER A[I] = B[I] + C[I] END DO LOCAL_SUM[ID] = 0; DO I = START_ITER:END_ITER IF A[I] > 0 THEN LOCAL_SUM[ID] = LOCAL_SUM[ID] + A[I] END IF END DO END FORALL SUM = LOCAL_SUM[0] + LOCAL_SUM[1] WRITE(*,*) SUM
// C++ with OpenMP #include <omp.h> #include <iostream> int main(void) { double a[8], b[8], c[8], localSum[2]; long s = 4; int id, i; #pragma omp parallel for private(id, i) reduction(+:s) for (int i = 0; i < 8; i++) { a[i] = b[i] + c[i]; } for (int i = 0; i < 2; i++) localSum[i] = 0; #pragma omp parallel for private(id, i) reduction(+:s) for (int i = 0; i < 8; i++) { id = omp_get_thread_num(); if (a[i] > 0) localSum[id] = localSum[id] + a[i]; } double sum = localSum[0] + localSum[1]; std::cout << sum << std::endl; }
Hardware Examples
References
- David E. Culler, Jaswinder Pal Singh, and Anoop Gupta, Parallel Computer Architecture: A Hardware/Software Approach, Morgan-Kauffman, 1999.
- Ian Foster, Designing and Building Parallel Programs, Addison-Wesley, 1995.
- Magne Haveraaen, "Machine and collection abstractions for user-implemented data-parallel programming," Scientific Programming, 8(4):231-246, 2000.
- W. Daniel Hillis and Guy L. Steele, Jr., "Data parallel algorithms," Communications of the ACM, 29(12):1170-1183, December 1986.
- Alexander C. Klaiber and Henry M. Levy, "A comparison of message passing and shared memory architectures for data parallel programs," in Proceedings of the 21st Annual International Symposium on Computer Architecture, April 1994, pp. 94-105.
- Yan Solihin, Fundamentals of Parallel Computer Architecture: Multichip and Multicore Systems, Solihin Books, 2008.