Move grf size to HwInfo

Change-Id: I65ee879644573586d63092b487f8b5ea0cedf1e3
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2019-12-17 08:55:09 +01:00
committed by sys_ocldev
parent 2b0db66c52
commit 8803b4cd4e
31 changed files with 181 additions and 102 deletions

View File

@@ -88,8 +88,9 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(
DEBUG_BREAK_IF(nullptr == threadPayload);
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
uint32_t grfSize = sizeof(typename GfxFamily::GRF);
return alignUp((kernel.getCrossThreadDataSize() +
getPerThreadDataSizeTotal(kernel.getKernelInfo().getMaxSimdSize(), numChannels, localWorkSize)),
getPerThreadDataSizeTotal(kernel.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, localWorkSize)),
WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
}
@@ -415,11 +416,11 @@ void HardwareCommandsHelper<GfxFamily>::updatePerThreadDataTotal(
uint32_t &numChannels,
size_t &sizePerThreadDataTotal,
size_t &localWorkItems) {
uint32_t grfSize = sizeof(typename GfxFamily::GRF);
sizePerThreadData = getPerThreadSizeLocalIDs(simd, grfSize, numChannels);
sizePerThreadData = getPerThreadSizeLocalIDs(simd, numChannels);
auto localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, numChannels);
localIdSizePerThread = std::max(localIdSizePerThread, sizeof(GRF));
uint32_t localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, grfSize, numChannels);
localIdSizePerThread = std::max(localIdSizePerThread, grfSize);
sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkItems) * localIdSizePerThread;
DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group

View File

@@ -27,14 +27,14 @@ void HardwareCommandsHelper<GfxFamily>::setAdditionalInfo(
const size_t &sizeCrossThreadData,
const size_t &sizePerThreadData,
const uint32_t threadsPerThreadGroup) {
DEBUG_BREAK_IF((sizeCrossThreadData % sizeof(GRF)) != 0);
auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / sizeof(GRF));
auto grfSize = sizeof(typename GfxFamily::GRF);
DEBUG_BREAK_IF((sizeCrossThreadData % grfSize) != 0);
auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / grfSize);
DEBUG_BREAK_IF(numGrfCrossThreadData == 0);
pInterfaceDescriptor->setCrossThreadConstantDataReadLength(numGrfCrossThreadData);
DEBUG_BREAK_IF((sizePerThreadData % sizeof(GRF)) != 0);
auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / sizeof(GRF));
DEBUG_BREAK_IF((sizePerThreadData % grfSize) != 0);
auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
// at least 1 GRF of perThreadData for each thread in a thread group when sizeCrossThreadData != 0
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
@@ -121,9 +121,12 @@ void HardwareCommandsHelper<GfxFamily>::programPerThreadData(
size_t &sizePerThreadDataTotal,
size_t &localWorkItems) {
uint32_t grfSize = sizeof(typename GfxFamily::GRF);
sendPerThreadData(
ioh,
simd,
grfSize,
numChannels,
localWorkSize,
kernel.getKernelInfo().workgroupDimensionsOrder,

View File

@@ -17,6 +17,7 @@ namespace NEO {
size_t PerThreadDataHelper::sendPerThreadData(
LinearStream &indirectHeap,
uint32_t simd,
uint32_t grfSize,
uint32_t numChannels,
const size_t localWorkSizes[3],
const std::array<uint8_t, 3> &workgroupWalkOrder,
@@ -24,7 +25,7 @@ size_t PerThreadDataHelper::sendPerThreadData(
auto offsetPerThreadData = indirectHeap.getUsed();
if (numChannels) {
auto localWorkSize = localWorkSizes[0] * localWorkSizes[1] * localWorkSizes[2];
auto sizePerThreadDataTotal = getPerThreadDataSizeTotal(simd, numChannels, localWorkSize);
auto sizePerThreadDataTotal = getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize);
auto pDest = indirectHeap.getSpace(sizePerThreadDataTotal);
// Generate local IDs
@@ -34,18 +35,18 @@ size_t PerThreadDataHelper::sendPerThreadData(
static_cast<uint16_t>(localWorkSizes[1]),
static_cast<uint16_t>(localWorkSizes[2])}},
std::array<uint8_t, 3>{{workgroupWalkOrder[0], workgroupWalkOrder[1], workgroupWalkOrder[2]}},
hasKernelOnlyImages);
hasKernelOnlyImages, grfSize);
}
return offsetPerThreadData;
}
uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd) {
uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize) {
uint32_t multiplier = static_cast<uint32_t>(getGRFsPerThread(simd));
uint32_t threadPayloadSize = 0;
threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * sizeof(GRF);
threadPayloadSize += (threadPayload.HeaderPresent) ? sizeof(GRF) : 0;
threadPayloadSize += (threadPayload.LocalIDFlattenedPresent) ? (sizeof(GRF) * multiplier) : 0;
threadPayloadSize += (threadPayload.UnusedPerThreadConstantPresent) ? (sizeof(GRF)) : 0;
threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * grfSize;
threadPayloadSize += (threadPayload.HeaderPresent) ? grfSize : 0;
threadPayloadSize += (threadPayload.LocalIDFlattenedPresent) ? (grfSize * multiplier) : 0;
threadPayloadSize += (threadPayload.UnusedPerThreadConstantPresent) ? grfSize : 0;
return threadPayloadSize;
}
} // namespace NEO

View File

@@ -18,22 +18,25 @@ namespace NEO {
class LinearStream;
struct PerThreadDataHelper {
static inline size_t getLocalIdSizePerThread(
static inline uint32_t getLocalIdSizePerThread(
uint32_t simd,
uint32_t grfSize,
uint32_t numChannels) {
return getPerThreadSizeLocalIDs(simd, numChannels);
return getPerThreadSizeLocalIDs(simd, grfSize, numChannels);
}
static inline size_t getPerThreadDataSizeTotal(
uint32_t simd,
uint32_t grfSize,
uint32_t numChannels,
size_t localWorkSize) {
return getThreadsPerWG(simd, localWorkSize) * getLocalIdSizePerThread(simd, numChannels);
return getThreadsPerWG(simd, localWorkSize) * getLocalIdSizePerThread(simd, grfSize, numChannels);
}
static size_t sendPerThreadData(
LinearStream &indirectHeap,
uint32_t simd,
uint32_t grfSize,
uint32_t numChannels,
const size_t localWorkSizes[3],
const std::array<uint8_t, 3> &workgroupWalkOrder,
@@ -45,6 +48,6 @@ struct PerThreadDataHelper {
threadPayload.LocalIDZPresent;
}
static uint32_t getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd);
static uint32_t getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize);
};
} // namespace NEO