Add helpers and debug prints for scratch/private allocations

Replace a loop with separate helpers to explicitly show differences
between per-HW-thread allocation types.

Related-To: NEO-7398
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski 2022-11-29 14:37:11 +00:00 committed by Compute-Runtime-Automation
parent da3a5046fc
commit d579a63730
3 changed files with 81 additions and 11 deletions

View File

@ -3105,8 +3105,55 @@ TEST_F(ModuleTests, givenFullyLinkedModuleAndSlmSizeExceedingLocalMemorySizeWhen
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, res);
std::string output = testing::internal::GetCapturedStderr();
std::string expectedOutput = "Size of SLM (" + std::to_string(slmInlineSizeCopy) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
EXPECT_EQ(expectedOutput, output);
const std::string expectedPart = "Size of SLM (" + std::to_string(slmInlineSizeCopy) + ") larger than available (" + std::to_string(localMemSize) + ")\n";
EXPECT_TRUE(output.find(expectedPart));
Kernel::fromHandle(kernelHandle)->destroy();
}
TEST_F(ModuleTests, givenFullyLinkedModuleWhenCreatingKernelThenDebugMsgOnPrivateAndScratchUsageIsPrinted) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.PrintDebugMessages.set(true);
auto pModule = std::make_unique<WhiteBox<Module>>(device, nullptr, ModuleType::Builtin);
pModule->maxGroupSize = 32;
char data[64]{};
std::unique_ptr<KernelInfo> kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->heapInfo.KernelHeapSize = 64;
kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, true);
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
pModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release());
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
linkerInput->textRelocations.push_back({{implicitArgsRelocationSymbolName, 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}});
pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput);
auto status = pModule->linkBinary();
EXPECT_TRUE(status);
::testing::internal::CaptureStderr();
ze_kernel_handle_t kernelHandle;
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = pModule->translationUnit->programInfo.kernelInfos[0]->kernelDescriptor.kernelMetadata.kernelName.c_str();
ze_result_t res = pModule->createKernel(&kernelDesc, &kernelHandle);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
std::string output = testing::internal::GetCapturedStderr();
std::ostringstream expectedOutput;
expectedOutput << "computeUnits for each thread: " << std::to_string(this->device->getDeviceInfo().computeUnitsUsedForScratch) << "\n"
<< "perHwThreadPrivateMemorySize: 0\t totalPrivateMemorySize: 0\n"
<< "perHwThreadScratchSize: 0\t totalScratchSize: 0\n"
<< "perHwThreadPrivateScratchSize: 0\t totalPrivateScratchSize: 0\n";
EXPECT_STREQ(output.c_str(), expectedOutput.str().c_str());
Kernel::fromHandle(kernelHandle)->destroy();
}

View File

@ -55,13 +55,31 @@ KernelHelper::ErrorCode KernelHelper::checkIfThereIsSpaceForScratchOrPrivate(Ker
return KernelHelper::ErrorCode::INVALID_KERNEL;
}
auto globalMemorySize = device->getDeviceInfo().globalMemSize;
uint32_t sizes[] = {attributes.perHwThreadPrivateMemorySize,
attributes.perThreadScratchSize[0],
attributes.perThreadScratchSize[1]};
for (auto &size : sizes) {
if (size != 0 && static_cast<uint64_t>(device->getDeviceInfo().computeUnitsUsedForScratch) * static_cast<uint64_t>(size) > globalMemorySize) {
return KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY;
}
auto computeUnitsForScratch = device->getDeviceInfo().computeUnitsUsedForScratch;
auto totalPrivateMemorySize = KernelHelper::getPrivateSurfaceSize(attributes.perHwThreadPrivateMemorySize, computeUnitsForScratch);
auto totalScratchSize = KernelHelper::getScratchSize(attributes.perThreadScratchSize[0], computeUnitsForScratch);
auto totalPrivateScratchSize = KernelHelper::getPrivateScratchSize(attributes.perThreadScratchSize[1], computeUnitsForScratch);
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr,
"computeUnits for each thread: %u\n", computeUnitsForScratch);
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr,
"perHwThreadPrivateMemorySize: %u\t totalPrivateMemorySize: %lu\n",
attributes.perHwThreadPrivateMemorySize, totalPrivateMemorySize);
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr,
"perHwThreadScratchSize: %u\t totalScratchSize: %lu\n",
attributes.perThreadScratchSize[0], totalScratchSize);
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr,
"perHwThreadPrivateScratchSize: %u\t totalPrivateScratchSize: %lu\n",
attributes.perThreadScratchSize[1], totalPrivateScratchSize);
if (totalPrivateMemorySize > globalMemorySize ||
totalScratchSize > globalMemorySize ||
totalPrivateScratchSize > globalMemorySize) {
return KernelHelper::ErrorCode::OUT_OF_DEVICE_MEMORY;
}
return KernelHelper::ErrorCode::SUCCESS;
}

View File

@ -24,10 +24,15 @@ struct KernelHelper {
static uint32_t getMaxWorkGroupCount(uint32_t simd, uint32_t availableThreadCount, uint32_t dssCount, uint32_t availableSlmSize,
uint32_t usedSlmSize, uint32_t maxBarrierCount, uint32_t numberOfBarriers, uint32_t workDim,
const size_t *localWorkSize);
static uint64_t getPrivateSurfaceSize(uint64_t perHwThreadPrivateMemorySize, uint32_t computeUnitsUsedForScratch) {
static inline uint64_t getPrivateSurfaceSize(uint64_t perHwThreadPrivateMemorySize, uint32_t computeUnitsUsedForScratch) {
return perHwThreadPrivateMemorySize * computeUnitsUsedForScratch;
}
static inline uint64_t getScratchSize(uint64_t perHwThreadScratchSize, uint32_t computeUnitsUsedForScratch) {
return perHwThreadScratchSize * computeUnitsUsedForScratch;
}
static inline uint64_t getPrivateScratchSize(uint64_t perHwThreadPrivateScratchSize, uint32_t computeUnitsUsedForScratch) {
return perHwThreadPrivateScratchSize * computeUnitsUsedForScratch;
}
static ErrorCode checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device);
};