CSC/ECE 506 Spring 2010/ch 2 maf: Difference between revisions

From Expertiza_Wiki
Jump to navigation Jump to search
Line 52: Line 52:
         sum = sum + a[i];
         sum = sum + a[i];
  Print sum;
  Print sum;
// Data parallel implementation in C++ with OpenMP.
int main(void)
{
    double a[8], b[8], c[8], localSum[2];
    #pragma omp parallel for
    for (int id = 0; id < 2; id++)
    {
        int local_iter = 4;
        int start_iter = id * local_iter;
        int end_iter = start_iter + local_iter;
        for (int i = start_iter; i < end_iter; i++)
            a[i] = b[i] + c[i];
   
        local_sum[id] = 0;
        for (int i = start_iter; i < end_iter; i++)
            if (a[i] > 0)
                localSum[id] = localSum[id] + a[i];
    }
    double sum = localSum[0] + localSum[1];
    cout << sum;
}


  // Data parallel implementation in C for CUDA.
  // Data parallel implementation in C for CUDA.
Line 59: Line 85:
     double* b,
     double* b,
     double* c,
     double* c,
     double* local_sum)
     double* localSum)
  {
  {
     int id = threadIdx.x;
     int id = threadIdx.x;
Line 71: Line 97:
     local_sum[id] = 0;
     local_sum[id] = 0;
     for (int i = start_iter; i < end_iter; i++)
     for (int i = start_iter; i < end_iter; i++)
         local_sum[id] = local_sum[id] + a[i];
         if (a[i] > 0)
            localSum[id] = localSum[id] + a[i];
  }
  }
    
    
  int main()
  int main()
  {
  {
     double a[8], b[8], c[8], local_sum[2];
     double a[8], b[8], c[8], localSum[2];
     kernel<<<1, 2>>>(a, b, c, local_sum);
     kernel<<<1, 2>>>(a, b, c, localSum);
     double sum = local_sum[0] + local_sum[1];
     double sum = localSum[0] + localSum[1];
     cout << sum;
     cout << sum;
  }
  }
Line 97: Line 124:
     LOCAL_SUM[ID] = 0;
     LOCAL_SUM[ID] = 0;
     DO I = START_ITER:END_ITER
     DO I = START_ITER:END_ITER
         LOCAL_SUM[ID] = LOCAL_SUM[ID] + A[I]
         IF A[I] > 0 THEN
            LOCAL_SUM[ID] = LOCAL_SUM[ID] + A[I]
        END IF
     END DO
     END DO
  END FORALL
  END FORALL

Revision as of 17:38, 27 January 2010

Supplement to Chapter 2: The Data Parallel Programming Model

Overview

Comparing the Data Parallel Model with the Shared Memory and Message Passing Models

Comparison between shared memory, message passing, and data parallel programming models (adapted from Solihin 2008, page 22).
Aspects Shared Memory Message Passing Data Parallel
Communication implicit (via loads/stores) explicit messages implicit
Synchronization explicit implicit (via messages) implicit for SIMD; explicit for SPMD
Hardware support typically required none
Development effort lower higher higher
Tuning effort higher lower

A Code Example

// Simple sequential code from Solihin 2008, page 25.

for (i = 0; i < 8; i++)
    a[i] = b[i] + c[i];
sum = 0;
for (i = 0; i < 8; i++)
    if (a[i] > 0)
        sum = sum + a[i];
Print sum;
// Data parallel implementation in C++ with OpenMP.

int main(void)
{
    double a[8], b[8], c[8], localSum[2];

    #pragma omp parallel for
    for (int id = 0; id < 2; id++)
    {
        int local_iter = 4;
        int start_iter = id * local_iter;
        int end_iter = start_iter + local_iter;

        for (int i = start_iter; i < end_iter; i++)
            a[i] = b[i] + c[i];
   
        local_sum[id] = 0;
        for (int i = start_iter; i < end_iter; i++)
            if (a[i] > 0)
                localSum[id] = localSum[id] + a[i];
    }

    double sum = localSum[0] + localSum[1];
    cout << sum;
}
// Data parallel implementation in C for CUDA.

__global__ void kernel(
    double* a,
    double* b,
    double* c,
    double* localSum)
{
    int id = threadIdx.x;
    int local_iter = 4;
    int start_iter = id * local_iter;
    int end_iter = start_iter + local_iter;
   
    for (int i = start_iter; i < end_iter; i++)
        a[i] = b[i] + c[i];
   
    local_sum[id] = 0;
    for (int i = start_iter; i < end_iter; i++)
        if (a[i] > 0)
            localSum[id] = localSum[id] + a[i];
}
 
int main()
{
    double a[8], b[8], c[8], localSum[2];
    kernel<<<1, 2>>>(a, b, c, localSum);
    double sum = localSum[0] + localSum[1];
    cout << sum;
}
C DATA PARALLEL IMPLEMENTATION IN FORTRAN

REAL A(8), B(8), C(8), LOCAL_SUM(2), SUM

FORALL ID = 1:2
    LOCAL_ITER = 4
    START_ITER = (ID - 1) * LOCAL_ITER + 1
    END_ITER = START_ITER + LOCAL_ITER - 1

    DO I = START_ITER:END_ITER
        A[I] = B[I] + C[I]
    END DO
 
    LOCAL_SUM[ID] = 0;
    DO I = START_ITER:END_ITER
        IF A[I] > 0 THEN
            LOCAL_SUM[ID] = LOCAL_SUM[ID] + A[I]
        END IF
    END DO
END FORALL

SUM = LOCAL_SUM[0] + LOCAL_SUM[1]
WRITE(*,*) SUM

Hardware Examples