Add helpers and debug prints for scratch/private allocations
Replace a loop with separate helpers to explicitly show differences between per-HW-thread allocation types. Related-To: NEO-7398 Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
parent
da3a5046fc
commit
d579a63730
|
@ -3105,8 +3105,55 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
|
|||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, res);
|
||||
|
||||
std::string output = testing::internal::GetCapturedStderr();
|
||||
std::string expectedOutput = "Size of SLM (" + std::to_string(slmInlineSizeCopy) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
|
||||
EXPECT_EQ(expectedOutput, output);
|
||||
const std::string expectedPart = "Size of SLM (" + std::to_string(slmInlineSizeCopy) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
|
||||
EXPECT_TRUE(output.find(expectedPart));
|
||||
|
||||
Kernel::fromHandle(kernelHandle)->destroy();
|
||||
}
|
||||
|
||||
TEST_F(ModuleTests, givenFullyLinkedModuleWhenCreatingKernelThenDebugMsgOnPrivateAndScratchUsageIsPrinted) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.PrintDebugMessages.set(true);
|
||||
|
||||
auto pModule = std::make_unique<WhiteBox<Module>>(device, nullptr, ModuleType::Builtin);
|
||||
pModule->maxGroupSize = 32;
|
||||
|
||||
char data[64]{};
|
||||
std::unique_ptr<KernelInfo> kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->heapInfo.KernelHeapSize = 64;
|
||||
kernelInfo->heapInfo.pKernelHeap = data;
|
||||
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true);
|
||||
|
||||
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
|
||||
pModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release());
|
||||
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
|
||||
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
|
||||
linkerInput->textRelocations.push_back({{implicitArgsRelocationSymbolName, 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}});
|
||||
pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput);
|
||||
|
||||
auto status = pModule->linkBinary();
|
||||
EXPECT_TRUE(status);
|
||||
|
||||
::testing::internal::CaptureStderr();
|
||||
|
||||
ze_kernel_handle_t kernelHandle;
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = pModule->translationUnit->programInfo.kernelInfos[0]->kernelDescriptor.kernelMetadata.kernelName.c_str();
|
||||
|
||||
ze_result_t res = pModule->createKernel(&kernelDesc, &kernelHandle);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
std::string output = testing::internal::GetCapturedStderr();
|
||||
std::ostringstream expectedOutput;
|
||||
expectedOutput << "computeUnits for each thread: " << std::to_string(this->device->getDeviceInfo().computeUnitsUsedForScratch) << "\n"
|
||||
<< "perHwThreadPrivateMemorySize: 0\t totalPrivateMemorySize: 0\n"
|
||||
<< "perHwThreadScratchSize: 0\t totalScratchSize: 0\n"
|
||||
<< "perHwThreadPrivateScratchSize: 0\t totalPrivateScratchSize: 0\n";
|
||||
EXPECT_STREQ(output.c_str(), expectedOutput.str().c_str());
|
||||
|
||||
Kernel::fromHandle(kernelHandle)->destroy();
|
||||
}
|
||||
|
|
|
@ -55,13 +55,31 @@ KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(Ker
|
|||
return KernelHelper::ErrorCode::INVALID_KERNEL;
|
||||
}
|
||||
auto globalMemorySize = device->getDeviceInfo().globalMemSize;
|
||||
uint32_t sizes[] = {attributes.perHwThreadPrivateMemorySize,
|
||||
attributes.perThreadScratchSize[0],
|
||||
attributes.perThreadScratchSize[1]};
|
||||
for (auto &size : sizes) {
|
||||
if (size != 0 && static_cast<uint64_t>(device->getDeviceInfo().computeUnitsUsedForScratch) * static_cast<uint64_t>(size) > globalMemorySize) {
|
||||
return KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
auto computeUnitsForScratch = device->getDeviceInfo().computeUnitsUsedForScratch;
|
||||
auto totalPrivateMemorySize = KernelHelper::getPrivateSurfaceSize(attributes.perHwThreadPrivateMemorySize, computeUnitsForScratch);
|
||||
auto totalScratchSize = KernelHelper::getScratchSize(attributes.perThreadScratchSize[0], computeUnitsForScratch);
|
||||
auto totalPrivateScratchSize = KernelHelper::getPrivateScratchSize(attributes.perThreadScratchSize[1], computeUnitsForScratch);
|
||||
|
||||
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"computeUnits for each thread: %u\n", computeUnitsForScratch);
|
||||
|
||||
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"perHwThreadPrivateMemorySize: %u\t totalPrivateMemorySize: %lu\n",
|
||||
attributes.perHwThreadPrivateMemorySize, totalPrivateMemorySize);
|
||||
|
||||
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"perHwThreadScratchSize: %u\t totalScratchSize: %lu\n",
|
||||
attributes.perThreadScratchSize[0], totalScratchSize);
|
||||
|
||||
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"perHwThreadPrivateScratchSize: %u\t totalPrivateScratchSize: %lu\n",
|
||||
attributes.perThreadScratchSize[1], totalPrivateScratchSize);
|
||||
|
||||
if (totalPrivateMemorySize > globalMemorySize ||
|
||||
totalScratchSize > globalMemorySize ||
|
||||
totalPrivateScratchSize > globalMemorySize) {
|
||||
|
||||
return KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
return KernelHelper::ErrorCode::SUCCESS;
|
||||
}
|
||||
|
|
|
@ -24,10 +24,15 @@ struct KernelHelper {
|
|||
static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
|
||||
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
|
||||
const size_t *localWorkSize);
|
||||
|
||||
static uint64_t getPrivateSurfaceSize(uint64_t perHwThreadPrivateMemorySize, uint32_t computeUnitsUsedForScratch) {
|
||||
static inline uint64_t getPrivateSurfaceSize(uint64_t perHwThreadPrivateMemorySize, uint32_t computeUnitsUsedForScratch) {
|
||||
return perHwThreadPrivateMemorySize * computeUnitsUsedForScratch;
|
||||
}
|
||||
static inline uint64_t getScratchSize(uint64_t perHwThreadScratchSize, uint32_t computeUnitsUsedForScratch) {
|
||||
return perHwThreadScratchSize * computeUnitsUsedForScratch;
|
||||
}
|
||||
static inline uint64_t getPrivateScratchSize(uint64_t perHwThreadPrivateScratchSize, uint32_t computeUnitsUsedForScratch) {
|
||||
return perHwThreadPrivateScratchSize * computeUnitsUsedForScratch;
|
||||
}
|
||||
static ErrorCode checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device);
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue