Graphics Reference
In-Depth Information
__kernel
void PipelinedBatchingKernel ()
{ int start = gOffsets [ get_group_id (0) ];
int i = start ;
int lIdx = get_local_id (0);
// 0. initialize
nPairsInQueue ( lIdx )=0;
clearLocks () ;
if ( lIdx == 0 )
{
ldsNRemainings = countRemainingPairs () ;
}
while ( ldsNRemainings != 0 )
{ // 1. fetch one pair from buffer
int4 iPair = make_int4 ( 1,0,0,0);
if ( lIdx == 0 )
{ // SIMD lane 0 fetches from global memory
iPair = make_int4 ( gPairs [ i ]. x , gPairs [ i ]. y , i ,0);
} else
{
// other lanes fetch from queues
if ( nPairsInQueue ( lIdx 1) != 0 )
{ iPair = ldsBuf [ lIdx 1];
nPairsInQueue ( lIdx 1) −− ;
}
}
// 2. check dependency of iPair to the batch
bool notLocked =! locked ( iPair . x )&&! locked ( iPair . y );
// 3. process iPair
if ( iPair . x !=
1)
{ if ( notLocked )
{
// iPair was independent. add to the batch
lock ( iPair . x ); lock ( iPair . y );
gBatchOut [ iPair . z ]= lIdx ;
} else
{
// forward iPair to next lane
ldsBuf [ lIdx ]= iPair ;
nPairsInQueue ( lIdx )++;
}
}
i ++;
if ( lIdx == 0 )
ldsNRemainings = countRemainingPairs () ;
}
}
Listing 4.1. Simplified kernel code of pipelined local batching.
Search WWH ::




Custom Search