CSC/ECE 506 Spring 2010/ch 2 maf: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
No edit summary |
||
Line 1: | Line 1: | ||
=The Data Parallel Programming Model= | ==Supplement to Chapter 2: The Data Parallel Programming Model== | ||
==Overview== | ===Overview=== | ||
==Comparing the Data Parallel Model with the Shared Memory and Message Passing Models== | ===Comparing the Data Parallel Model with the Shared Memory and Message Passing Models=== | ||
<!--ref>Solihin, Y.: "Fundamentals", page 22. Solihin Books, 2008.</ref--> | <!--ref>Solihin, Y.: "Fundamentals", page 22. Solihin Books, 2008.</ref--> | ||
Line 23: | Line 23: | ||
| explicit | | explicit | ||
| implicit (via messages) | | implicit (via messages) | ||
| implicit | | implicit for SIMD; explicit for SPMD | ||
|- | |- | ||
| Hardware support | | Hardware support | ||
Line 41: | Line 41: | ||
|} | |} | ||
==A Code Example== | ===A Code Example=== | ||
// ''Simple sequential code from Solihin 2008, page 25.'' | // ''Simple sequential code from Solihin 2008, page 25.'' | ||
Line 53: | Line 53: | ||
Print sum; | Print sum; | ||
==Hardware Examples== | shared double a[], b[], c[], local_sum; | ||
id = getmyid(); | |||
local_iter = 4; | |||
start_iter = id * local_iter; | |||
end_iter = start_iter + local_iter; | |||
'''for''' (i = start_iter; i < end_iter; i++) | |||
a[i] = b[i] + c[i]; | |||
local_sum[id] = 0; | |||
'''for''' (i = start_iter; i < end_iter; i++) | |||
local_sum[id] = local_sum[id] + a[i]; | |||
'''if''' (id == 0) { | |||
sum = local_sum[0] + local_sum[1]; | |||
Print sum; | |||
} | |||
__global__ void kernel( | |||
double* a, | |||
double* b, | |||
double* c, | |||
double* local_sum) | |||
{ | |||
id = threadIdx.x; | |||
local_iter = 4; | |||
start_iter = id * local_iter; | |||
end_iter = start_iter + local_iter; | |||
for (i = start_iter; i < end_iter; i++) | |||
a[i] = b[i] + c[i]; | |||
local_sum[id] = 0; | |||
for (i = start_iter; i < end_iter; i++) | |||
local_sum[id] = local_sum[id] + a[i]; | |||
} | |||
int main() | |||
{ | |||
double a[8], b[8], c[8], local_sum[2]; | |||
kernel<<<1, 2>>>(a, b, c, local_sum); | |||
sum = local_sum[0] + local_sum[1]; | |||
cout << sum; | |||
} | |||
===Hardware Examples=== |
Revision as of 06:10, 27 January 2010
Supplement to Chapter 2: The Data Parallel Programming Model
Overview
Aspects | Shared Memory | Message Passing | Data Parallel |
---|---|---|---|
Communication | implicit (via loads/stores) | explicit messages | implicit |
Synchronization | explicit | implicit (via messages) | implicit for SIMD; explicit for SPMD |
Hardware support | typically required | none | |
Development effort | lower | higher | higher |
Tuning effort | higher | lower |
A Code Example
// Simple sequential code from Solihin 2008, page 25. for (i = 0; i < 8; i++) a[i] = b[i] + c[i]; sum = 0; for (i = 0; i < 8; i++) if (a[i] > 0) sum = sum + a[i]; Print sum;
shared double a[], b[], c[], local_sum; id = getmyid(); local_iter = 4; start_iter = id * local_iter; end_iter = start_iter + local_iter; for (i = start_iter; i < end_iter; i++) a[i] = b[i] + c[i]; local_sum[id] = 0; for (i = start_iter; i < end_iter; i++) local_sum[id] = local_sum[id] + a[i]; if (id == 0) { sum = local_sum[0] + local_sum[1]; Print sum; }
__global__ void kernel( double* a, double* b, double* c, double* local_sum) { id = threadIdx.x; local_iter = 4; start_iter = id * local_iter; end_iter = start_iter + local_iter; for (i = start_iter; i < end_iter; i++) a[i] = b[i] + c[i]; local_sum[id] = 0; for (i = start_iter; i < end_iter; i++) local_sum[id] = local_sum[id] + a[i]; } int main() { double a[8], b[8], c[8], local_sum[2]; kernel<<<1, 2>>>(a, b, c, local_sum); sum = local_sum[0] + local_sum[1]; cout << sum; }