mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
Move grf size to HwInfo
Change-Id: I65ee879644573586d63092b487f8b5ea0cedf1e3 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
2b0db66c52
commit
8803b4cd4e
@@ -88,8 +88,9 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(
|
||||
DEBUG_BREAK_IF(nullptr == threadPayload);
|
||||
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
|
||||
uint32_t grfSize = sizeof(typename GfxFamily::GRF);
|
||||
return alignUp((kernel.getCrossThreadDataSize() +
|
||||
getPerThreadDataSizeTotal(kernel.getKernelInfo().getMaxSimdSize(), numChannels, localWorkSize)),
|
||||
getPerThreadDataSizeTotal(kernel.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, localWorkSize)),
|
||||
WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
}
|
||||
|
||||
@@ -415,11 +416,11 @@ void HardwareCommandsHelper<GfxFamily>::updatePerThreadDataTotal(
|
||||
uint32_t &numChannels,
|
||||
size_t &sizePerThreadDataTotal,
|
||||
size_t &localWorkItems) {
|
||||
uint32_t grfSize = sizeof(typename GfxFamily::GRF);
|
||||
sizePerThreadData = getPerThreadSizeLocalIDs(simd, grfSize, numChannels);
|
||||
|
||||
sizePerThreadData = getPerThreadSizeLocalIDs(simd, numChannels);
|
||||
|
||||
auto localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, numChannels);
|
||||
localIdSizePerThread = std::max(localIdSizePerThread, sizeof(GRF));
|
||||
uint32_t localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, grfSize, numChannels);
|
||||
localIdSizePerThread = std::max(localIdSizePerThread, grfSize);
|
||||
|
||||
sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkItems) * localIdSizePerThread;
|
||||
DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group
|
||||
|
||||
@@ -27,14 +27,14 @@ void HardwareCommandsHelper<GfxFamily>::setAdditionalInfo(
|
||||
const size_t &sizeCrossThreadData,
|
||||
const size_t &sizePerThreadData,
|
||||
const uint32_t threadsPerThreadGroup) {
|
||||
|
||||
DEBUG_BREAK_IF((sizeCrossThreadData % sizeof(GRF)) != 0);
|
||||
auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / sizeof(GRF));
|
||||
auto grfSize = sizeof(typename GfxFamily::GRF);
|
||||
DEBUG_BREAK_IF((sizeCrossThreadData % grfSize) != 0);
|
||||
auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / grfSize);
|
||||
DEBUG_BREAK_IF(numGrfCrossThreadData == 0);
|
||||
pInterfaceDescriptor->setCrossThreadConstantDataReadLength(numGrfCrossThreadData);
|
||||
|
||||
DEBUG_BREAK_IF((sizePerThreadData % sizeof(GRF)) != 0);
|
||||
auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / sizeof(GRF));
|
||||
DEBUG_BREAK_IF((sizePerThreadData % grfSize) != 0);
|
||||
auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
|
||||
|
||||
// at least 1 GRF of perThreadData for each thread in a thread group when sizeCrossThreadData != 0
|
||||
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
|
||||
@@ -121,9 +121,12 @@ void HardwareCommandsHelper<GfxFamily>::programPerThreadData(
|
||||
size_t &sizePerThreadDataTotal,
|
||||
size_t &localWorkItems) {
|
||||
|
||||
uint32_t grfSize = sizeof(typename GfxFamily::GRF);
|
||||
|
||||
sendPerThreadData(
|
||||
ioh,
|
||||
simd,
|
||||
grfSize,
|
||||
numChannels,
|
||||
localWorkSize,
|
||||
kernel.getKernelInfo().workgroupDimensionsOrder,
|
||||
|
||||
@@ -17,6 +17,7 @@ namespace NEO {
|
||||
size_t PerThreadDataHelper::sendPerThreadData(
|
||||
LinearStream &indirectHeap,
|
||||
uint32_t simd,
|
||||
uint32_t grfSize,
|
||||
uint32_t numChannels,
|
||||
const size_t localWorkSizes[3],
|
||||
const std::array<uint8_t, 3> &workgroupWalkOrder,
|
||||
@@ -24,7 +25,7 @@ size_t PerThreadDataHelper::sendPerThreadData(
|
||||
auto offsetPerThreadData = indirectHeap.getUsed();
|
||||
if (numChannels) {
|
||||
auto localWorkSize = localWorkSizes[0] * localWorkSizes[1] * localWorkSizes[2];
|
||||
auto sizePerThreadDataTotal = getPerThreadDataSizeTotal(simd, numChannels, localWorkSize);
|
||||
auto sizePerThreadDataTotal = getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize);
|
||||
auto pDest = indirectHeap.getSpace(sizePerThreadDataTotal);
|
||||
|
||||
// Generate local IDs
|
||||
@@ -34,18 +35,18 @@ size_t PerThreadDataHelper::sendPerThreadData(
|
||||
static_cast<uint16_t>(localWorkSizes[1]),
|
||||
static_cast<uint16_t>(localWorkSizes[2])}},
|
||||
std::array<uint8_t, 3>{{workgroupWalkOrder[0], workgroupWalkOrder[1], workgroupWalkOrder[2]}},
|
||||
hasKernelOnlyImages);
|
||||
hasKernelOnlyImages, grfSize);
|
||||
}
|
||||
return offsetPerThreadData;
|
||||
}
|
||||
|
||||
uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd) {
|
||||
uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize) {
|
||||
uint32_t multiplier = static_cast<uint32_t>(getGRFsPerThread(simd));
|
||||
uint32_t threadPayloadSize = 0;
|
||||
threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * sizeof(GRF);
|
||||
threadPayloadSize += (threadPayload.HeaderPresent) ? sizeof(GRF) : 0;
|
||||
threadPayloadSize += (threadPayload.LocalIDFlattenedPresent) ? (sizeof(GRF) * multiplier) : 0;
|
||||
threadPayloadSize += (threadPayload.UnusedPerThreadConstantPresent) ? (sizeof(GRF)) : 0;
|
||||
threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * grfSize;
|
||||
threadPayloadSize += (threadPayload.HeaderPresent) ? grfSize : 0;
|
||||
threadPayloadSize += (threadPayload.LocalIDFlattenedPresent) ? (grfSize * multiplier) : 0;
|
||||
threadPayloadSize += (threadPayload.UnusedPerThreadConstantPresent) ? grfSize : 0;
|
||||
return threadPayloadSize;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -18,22 +18,25 @@ namespace NEO {
|
||||
class LinearStream;
|
||||
|
||||
struct PerThreadDataHelper {
|
||||
static inline size_t getLocalIdSizePerThread(
|
||||
static inline uint32_t getLocalIdSizePerThread(
|
||||
uint32_t simd,
|
||||
uint32_t grfSize,
|
||||
uint32_t numChannels) {
|
||||
return getPerThreadSizeLocalIDs(simd, numChannels);
|
||||
return getPerThreadSizeLocalIDs(simd, grfSize, numChannels);
|
||||
}
|
||||
|
||||
static inline size_t getPerThreadDataSizeTotal(
|
||||
uint32_t simd,
|
||||
uint32_t grfSize,
|
||||
uint32_t numChannels,
|
||||
size_t localWorkSize) {
|
||||
return getThreadsPerWG(simd, localWorkSize) * getLocalIdSizePerThread(simd, numChannels);
|
||||
return getThreadsPerWG(simd, localWorkSize) * getLocalIdSizePerThread(simd, grfSize, numChannels);
|
||||
}
|
||||
|
||||
static size_t sendPerThreadData(
|
||||
LinearStream &indirectHeap,
|
||||
uint32_t simd,
|
||||
uint32_t grfSize,
|
||||
uint32_t numChannels,
|
||||
const size_t localWorkSizes[3],
|
||||
const std::array<uint8_t, 3> &workgroupWalkOrder,
|
||||
@@ -45,6 +48,6 @@ struct PerThreadDataHelper {
|
||||
threadPayload.LocalIDZPresent;
|
||||
}
|
||||
|
||||
static uint32_t getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd);
|
||||
static uint32_t getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize);
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user