feature: gpu assert implementation

- allocate assert buffer when kernel has assert
- track assert kernels in cmdlists and cmdqueues
- check and print assert at sync calls: cmdqueue synchronize(), fence
synchronize(), event hostSynchronize(), synchronous imm cmdlists
append()

Related-To: NEO-5753

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2023-03-13 14:14:35 +00:00
committed by Compute-Runtime-Automation
parent f57ff2913c
commit 0204761add
27 changed files with 665 additions and 13 deletions

View File

@@ -337,6 +337,10 @@ struct CommandList : _ze_command_list_handle_t {
this->csr = newCsr;
}
bool hasKernelWithAssert() {
return kernelWithAssertAppended;
}
protected:
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
@@ -411,6 +415,7 @@ struct CommandList : _ze_command_list_handle_t {
bool pipeControlMultiKernelEventSync = false;
bool compactL3FlushEventPacket = false;
bool dynamicHeapRequired = false;
bool kernelWithAssertAppended = false;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -120,6 +120,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
containsCooperativeKernelsFlag = false;
clearCommandsToPatch();
commandListSLMEnabled = false;
kernelWithAssertAppended = false;
if (!isCopyOnly()) {
postInitComputeSetup();

View File

@@ -169,6 +169,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
protected:
void printKernelsPrintfOutput(bool hangDetected);
MOCKABLE_VIRTUAL void checkAssert();
std::atomic<bool> dependenciesPresent{false};
};

View File

@@ -7,6 +7,7 @@
#pragma once
#include "shared/source/assert_handler/assert_handler.h"
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/scratch_space_controller.h"
@@ -274,14 +275,17 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, completionStamp.taskCount);
if (waitStatus == NEO::WaitStatus::GpuHang) {
this->printKernelsPrintfOutput(true);
this->checkAssert();
return ZE_RESULT_ERROR_DEVICE_LOST;
}
csr->getInternalAllocationStorage()->cleanAllocationList(completionStamp.taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
this->printKernelsPrintfOutput(false);
this->checkAssert();
}
this->cmdListCurrentStartOffset = commandStream->getUsed();
this->containsAnyKernel = false;
this->kernelWithAssertAppended = false;
this->handlePostSubmissionState();
if (NEO::DebugManager.flags.PauseOnEnqueue.get() != -1) {
@@ -999,4 +1003,12 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::printKernelsPrintfOutput(boo
this->printfKernelContainer.clear();
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
if (this->hasKernelWithAssert()) {
UNRECOVERABLE_IF(this->device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get() == nullptr);
this->device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
}
} // namespace L0

View File

@@ -214,6 +214,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
storePrintfKernel(kernel);
}
if (kernelDescriptor.kernelAttributes.flags.usesAssert) {
kernelWithAssertAppended = true;
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart});
additionalCommands.pop_front();

View File

@@ -360,6 +360,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
}
if (kernelDescriptor.kernelAttributes.flags.usesAssert) {
kernelWithAssertAppended = true;
}
if (kernelImp->usesRayTracing()) {
NEO::GraphicsAllocation *memoryBackedBuffer = device->getNEODevice()->getRTMemoryBackedBuffer();
if (memoryBackedBuffer == nullptr) {

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/assert_handler/assert_handler.h"
#include "shared/source/command_container/cmdcontainer.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/csr_definitions.h"
@@ -188,8 +189,19 @@ void CommandQueueImp::printKernelsPrintfOutput(bool hangDetected) {
this->printfKernelContainer.clear();
}
void CommandQueueImp::checkAssert() {
bool valueExpected = true;
bool hadAssert = cmdListWithAssertExecuted.compare_exchange_strong(valueExpected, false);
if (hadAssert) {
UNRECOVERABLE_IF(device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get() == nullptr);
device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
}
void CommandQueueImp::postSyncOperations(bool hangDetected) {
printKernelsPrintfOutput(hangDetected);
checkAssert();
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());

View File

@@ -181,6 +181,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
this->mergeOneCmdListPipelinedState(commandList);
this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList);
if (commandList->hasKernelWithAssert()) {
cmdListWithAssertExecuted.exchange(true);
}
}
this->updateBaseAddressState(CommandList::fromHandle(phCommandLists[numCommandLists - 1]));

View File

@@ -88,6 +88,7 @@ struct CommandQueueImp : public CommandQueue {
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) override;
void makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) override;
void printKernelsPrintfOutput(bool hangDetected);
void checkAssert();
protected:
MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,
@@ -101,6 +102,7 @@ struct CommandQueueImp : public CommandQueue {
NEO::HeapContainer heapContainer;
ze_command_queue_desc_t desc;
std::vector<Kernel *> printfKernelContainer;
std::atomic<bool> cmdListWithAssertExecuted = false;
Device *device = nullptr;
NEO::CommandStreamReceiver *csr = nullptr;

View File

@@ -7,6 +7,7 @@
#include "level_zero/core/source/device/device_imp.h"
#include "shared/source/assert_handler/assert_handler.h"
#include "shared/source/built_ins/sip.h"
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/command_stream_receiver.h"
@@ -1246,6 +1247,9 @@ void DeviceImp::releaseResources() {
!neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger->isLegacy()) {
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.reset(nullptr);
}
if (neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->assertHandler.get()) {
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->assertHandler.reset(nullptr);
}
// close connection and async threads in debug session before releasing device resources
if (debugSession.get()) {

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/assert_handler/assert_handler.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/sub_device.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
@@ -309,6 +310,9 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
static_cast<Kernel *>(this->getKernelForPrintf())->printPrintfOutput(true);
this->setKernelForPrintf(nullptr);
}
if (device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
return ret;
}
@@ -318,6 +322,9 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
if (elapsedTimeSinceGpuHangCheck.count() >= this->gpuHangCheckPeriod.count()) {
lastHangCheckTime = currentTime;
if (this->csr->isGpuHangDetected()) {
if (device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
return ZE_RESULT_ERROR_DEVICE_LOST;
}
}
@@ -332,6 +339,9 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
} while (timeDiff < timeout);
if (device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
return ret;
}

View File

@@ -64,12 +64,14 @@ ze_result_t Fence::hostSynchronize(uint64_t timeout) {
ret = queryStatus();
if (ret == ZE_RESULT_SUCCESS) {
cmdQueue->printKernelsPrintfOutput(false);
cmdQueue->checkAssert();
return ZE_RESULT_SUCCESS;
}
currentTime = std::chrono::high_resolution_clock::now();
if (csr->checkGpuHangDetected(currentTime, lastHangCheckTime)) {
cmdQueue->printKernelsPrintfOutput(true);
cmdQueue->checkAssert();
return ZE_RESULT_ERROR_DEVICE_LOST;
}

View File

@@ -7,6 +7,7 @@
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "shared/source/assert_handler/assert_handler.h"
#include "shared/source/debugger/debugger_l0.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
@@ -217,6 +218,12 @@ KernelImp::~KernelImp() {
PrintfHandler::printOutput(kernelImmData, this->printfBuffer, module->getDevice(), false);
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(printfBuffer);
}
if (kernelImmData && kernelImmData->getDescriptor().kernelAttributes.flags.usesAssert && module &&
module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
slmArgSizes.clear();
crossThreadData.reset();
surfaceStateHeapData.reset();
@@ -921,6 +928,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
this->setInlineSamplers();
this->setAssertBuffer();
residencyContainer.insert(residencyContainer.end(), kernelImmData->getResidencyContainer().begin(),
kernelImmData->getResidencyContainer().end());
@@ -1106,4 +1115,17 @@ ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint
}
return ZE_RESULT_SUCCESS;
}
void KernelImp::setAssertBuffer() {
if (!getKernelDescriptor().kernelAttributes.flags.usesAssert) {
return;
}
auto assertHandler = this->module->getDevice()->getNEODevice()->getRootDeviceEnvironmentRef().getAssertHandler(this->module->getDevice()->getNEODevice());
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.assertBufferAddress,
static_cast<uintptr_t>(assertHandler->getAssertBuffer()->getGpuAddressToPatch()));
this->residencyContainer.push_back(assertHandler->getAssertBuffer());
}
} // namespace L0

View File

@@ -181,6 +181,7 @@ struct KernelImp : Kernel {
void createPrintfBuffer();
void setDebugSurface();
void setAssertBuffer();
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless);

View File

@@ -228,9 +228,9 @@ struct CommandHandler {
return result;
}
ze_result_t appendKernel(ze_kernel_handle_t kernel, const ze_group_count_t &dispatchTraits) {
ze_result_t appendKernel(ze_kernel_handle_t kernel, const ze_group_count_t &dispatchTraits, ze_event_handle_t event = nullptr) {
return zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
nullptr, 0, nullptr);
event, 0, nullptr);
}
ze_result_t execute() {

View File

@@ -177,6 +177,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
template <GFXCORE_FAMILY gfxCoreFamily>
struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreFamily> {
using BaseClass = CommandListCoreFamilyImmediate<gfxCoreFamily>;
using BaseClass::checkAssert;
using BaseClass::cmdQImmediate;
using BaseClass::commandContainer;
using BaseClass::compactL3FlushEventPacket;
@@ -191,6 +192,7 @@ struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreF
using BaseClass::isTbxMode;
using BaseClass::pipeControlMultiKernelEventSync;
using BaseClass::requiredStreamState;
using CommandList::kernelWithAssertAppended;
};
template <>
@@ -225,6 +227,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::signalAllEventPackets;
using BaseClass::stateBaseAddressTracking;
using BaseClass::stateComputeModeTracking;
using CommandList::kernelWithAssertAppended;
WhiteBox();
~WhiteBox() override;
@@ -563,14 +566,27 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
ze_result_t executeCommandListImmediate(bool performMigration) override {
++executeCommandListImmediateCalledCount;
if (callBaseExecute) {
return BaseClass::executeCommandListImmediate(performMigration);
}
return executeCommandListImmediateReturnValue;
}
ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) override {
++executeCommandListImmediateWithFlushTaskCalledCount;
if (callBaseExecute) {
return BaseClass::executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
}
return executeCommandListImmediateWithFlushTaskReturnValue;
}
void checkAssert() override {
checkAssertCalled++;
}
uint32_t checkAssertCalled = 0;
bool callBaseExecute = false;
ze_result_t executeCommandListImmediateReturnValue = ZE_RESULT_SUCCESS;
uint32_t executeCommandListImmediateCalledCount = 0;

View File

@@ -24,6 +24,7 @@ template <>
struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using BaseClass = ::L0::CommandQueueImp;
using BaseClass::buffers;
using BaseClass::cmdListWithAssertExecuted;
using BaseClass::commandStream;
using BaseClass::csr;
using BaseClass::device;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,10 +7,10 @@
#pragma once
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/module/module.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
namespace L0 {
namespace ult {
@@ -30,6 +30,7 @@ class MockDeviceForSpv : public Mock<DeviceImp> {
ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module,
ze_module_build_log_handle_t *buildLog, ModuleType type) override;
~MockDeviceForSpv() override {
builtins.reset(nullptr);
}
};

View File

@@ -62,6 +62,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
using ::L0::KernelImp::printfBuffer;
using ::L0::KernelImp::requiredWorkgroupOrder;
using ::L0::KernelImp::residencyContainer;
using ::L0::KernelImp::setAssertBuffer;
using ::L0::KernelImp::surfaceStateHeapData;
using ::L0::KernelImp::surfaceStateHeapDataSize;
using ::L0::KernelImp::unifiedMemoryControls;

View File

@@ -0,0 +1,10 @@
#
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_assert.cpp
)

View File

@@ -0,0 +1,517 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/hw_info.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_assert_handler.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_logical_state_helper.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/event_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
#include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
#include <chrono>
namespace L0 {
namespace ult {
using CommandListImmediateWithAssert = Test<DeviceFixture>;
TEST(KernelAssert, GivenKernelWithAssertWhenDestroyedThenAssertIsChecked) {
NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
Mock<L0::DeviceImp> l0Device(neoDevice, neoDevice->getExecutionEnvironment());
auto assertHandler = new MockAssertHandler(neoDevice);
neoDevice->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
Mock<Module> module(&l0Device, nullptr, ModuleType::User);
{
Mock<Kernel> kernel;
kernel.module = &module;
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
}
EXPECT_EQ(1u, assertHandler->printAssertAndAbortCalled);
}
TEST(KernelAssert, GivenKernelWithAssertWhenNoAssertHandlerOnDestroyThenDestructorDoesNotCrash) {
NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
Mock<L0::DeviceImp> l0Device(neoDevice, neoDevice->getExecutionEnvironment());
Mock<Module> module(&l0Device, nullptr, ModuleType::User);
{
Mock<Kernel> kernel;
kernel.module = &module;
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
}
}
TEST(KernelAssert, GivenKernelWithAssertWhenSettingAssertBufferThenAssertBufferIsAddedToResidencyAndCrossThreadDataPatched) {
NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
Mock<L0::DeviceImp> l0Device(neoDevice, neoDevice->getExecutionEnvironment());
auto assertHandler = new MockAssertHandler(neoDevice);
neoDevice->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
Mock<Module> module(&l0Device, nullptr, ModuleType::User);
Mock<Kernel> kernel;
kernel.module = &module;
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.stateless = 0;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.pointerSize = sizeof(uintptr_t);
kernel.crossThreadData = std::make_unique<uint8_t[]>(16);
kernel.crossThreadDataSize = sizeof(uint8_t[16]);
kernel.setAssertBuffer();
auto assertBufferAddress = assertHandler->getAssertBuffer()->getGpuAddressToPatch();
EXPECT_TRUE(memcmp(kernel.crossThreadData.get(), &assertBufferAddress, sizeof(assertBufferAddress)) == 0);
EXPECT_TRUE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), assertHandler->getAssertBuffer()) != kernel.getResidencyContainer().end());
}
TEST(KernelAssert, GivenNoAssertHandlerWhenKernelWithAssertSetsAssertBufferThenAssertHandlerIsCreated) {
NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
Mock<L0::DeviceImp> l0Device(neoDevice, neoDevice->getExecutionEnvironment());
Mock<Module> module(&l0Device, nullptr, ModuleType::User);
Mock<Kernel> kernel;
kernel.module = &module;
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.stateless = 0;
kernel.descriptor.payloadMappings.implicitArgs.assertBufferAddress.pointerSize = sizeof(uintptr_t);
kernel.crossThreadData = std::make_unique<uint8_t[]>(16);
kernel.crossThreadDataSize = sizeof(uint8_t[16]);
kernel.setAssertBuffer();
EXPECT_NE(nullptr, neoDevice->getRootDeviceEnvironmentRef().assertHandler.get());
}
TEST(CommandListAssertTest, GivenCmdListWhenKernelWithAssertAppendedThenHasKernelWithAssertIsSetTrue) {
NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
Mock<L0::DeviceImp> l0Device(neoDevice, neoDevice->getExecutionEnvironment());
ze_result_t returnValue;
Mock<Kernel> kernel;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(NEO::defaultHwInfo->platform.eProductFamily, &l0Device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_group_count_t groupCount{1, 1, 1};
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(commandList->hasKernelWithAssert());
}
TEST(CommandListAssertTest, GivenCmdListWithAppendedAssertKernelWhenResetThenKernelWithAssertAppendedIsFalse) {
NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get(), 0));
Mock<L0::DeviceImp> l0Device(neoDevice, neoDevice->getExecutionEnvironment());
ze_result_t returnValue;
std::unique_ptr<ult::WhiteBox<L0::CommandList>> commandList(ult::whiteboxCast(CommandList::create(NEO::defaultHwInfo->platform.eProductFamily,
&l0Device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
commandList->kernelWithAssertAppended = true;
EXPECT_TRUE(commandList->hasKernelWithAssert());
commandList->reset();
EXPECT_FALSE(commandList->kernelWithAssertAppended);
EXPECT_FALSE(commandList->hasKernelWithAssert());
}
TEST_F(CommandListImmediateWithAssert, GivenImmediateCmdListWhenKernelWithAssertAppendedThenHasKernelWithAssertIsSetFalseAfterFlush) {
auto assertHandler = new MockAssertHandler(device->getNEODevice());
device->getNEODevice()->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
ze_result_t result;
Mock<Kernel> kernel;
ze_command_queue_desc_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
desc.pNext = 0;
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(NEO::defaultHwInfo->platform.eProductFamily, device, &desc, false,
NEO::EngineGroupType::RenderCompute, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_group_count_t groupCount{1, 1, 1};
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, assertHandler->printAssertAndAbortCalled);
EXPECT_FALSE(commandList->hasKernelWithAssert());
}
HWTEST2_F(CommandListImmediateWithAssert, GivenImmediateCmdListWhenCheckingAssertThenPrintMessageAndAbortOnAssertHandlerIsCalled, IsAtLeastSkl) {
ze_result_t result;
ze_command_queue_desc_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
desc.pNext = 0;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(NEO::defaultHwInfo->platform.eProductFamily, device, &desc, false,
NEO::EngineGroupType::RenderCompute, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto assertHandler = new MockAssertHandler(device->getNEODevice());
device->getNEODevice()->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
static_cast<MockCommandListImmediate<gfxCoreFamily> *>(commandList.get())->kernelWithAssertAppended = true;
static_cast<MockCommandListImmediate<gfxCoreFamily> *>(commandList.get())->checkAssert();
EXPECT_EQ(1u, assertHandler->printAssertAndAbortCalled);
}
HWTEST2_F(CommandListImmediateWithAssert, GivenImmediateCmdListAndNoAssertHandlerWhenCheckingAssertThenUnrecoverableIsCalled, IsAtLeastSkl) {
ze_result_t result;
ze_command_queue_desc_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
desc.pNext = 0;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(NEO::defaultHwInfo->platform.eProductFamily, device, &desc, false,
NEO::EngineGroupType::RenderCompute, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
static_cast<MockCommandListImmediate<gfxCoreFamily> *>(commandList.get())->kernelWithAssertAppended = true;
EXPECT_THROW(static_cast<MockCommandListImmediate<gfxCoreFamily> *>(commandList.get())->checkAssert(), std::exception);
}
HWTEST2_F(CommandListImmediateWithAssert, givenKernelWithAssertWhenAppendedToAsynchronousImmCommandListThenAssertIsNotChecked, IsAtLeastSkl) {
ze_result_t result;
Mock<Kernel> kernel;
ze_command_queue_desc_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
desc.pNext = 0;
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.isFlushTaskSubmissionEnabled = true;
cmdList.callBaseExecute = true;
cmdList.cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
cmdList.isSyncModeQueue = false;
result = cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
cmdList.setCsr(&csr);
cmdList.getCmdContainer().setImmediateCmdListCsr(&csr);
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(), false, result);
cmdList.cmdQImmediate = commandQueue;
auto mockCsrLogicalStateHelper = new NEO::LogicalStateHelperMock<FamilyType>();
csr.logicalStateHelper.reset(mockCsrLogicalStateHelper);
ze_group_count_t groupCount{1, 1, 1};
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
CmdListKernelLaunchParams launchParams = {};
result = cmdList.appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, cmdList.checkAssertCalled);
}
HWTEST2_F(CommandListImmediateWithAssert, givenKernelWithAssertWhenAppendedToSynchronousImmCommandListThenAssertIsChecked, IsAtLeastSkl) {
ze_result_t result;
Mock<Kernel> kernel;
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.isFlushTaskSubmissionEnabled = true;
cmdList.callBaseExecute = true;
cmdList.cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
cmdList.isSyncModeQueue = true;
result = cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
cmdList.setCsr(&csr);
cmdList.getCmdContainer().setImmediateCmdListCsr(&csr);
ze_command_queue_desc_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
desc.pNext = 0;
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(), false, result);
cmdList.cmdQImmediate = commandQueue;
auto mockCsrLogicalStateHelper = new NEO::LogicalStateHelperMock<FamilyType>();
csr.logicalStateHelper.reset(mockCsrLogicalStateHelper);
ze_group_count_t groupCount{1, 1, 1};
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
CmdListKernelLaunchParams launchParams = {};
result = cmdList.appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, cmdList.checkAssertCalled);
}
HWTEST2_F(CommandListImmediateWithAssert, givenKernelWithAssertWhenAppendToSynchronousImmCommandListHangsThenAssertIsChecked, IsAtLeastSkl) {
ze_result_t result;
Mock<Kernel> kernel;
TaskCountType currentTaskCount = 33u;
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
csr.callBaseWaitForCompletionWithTimeout = false;
csr.returnWaitForCompletionWithTimeout = WaitStatus::GpuHang;
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.isFlushTaskSubmissionEnabled = true;
cmdList.callBaseExecute = true;
cmdList.cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
cmdList.isSyncModeQueue = true;
result = cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
cmdList.setCsr(&csr);
cmdList.getCmdContainer().setImmediateCmdListCsr(&csr);
ze_command_queue_desc_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
desc.pNext = 0;
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
auto commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, cmdList.isCopyOnly(), false, result);
cmdList.cmdQImmediate = commandQueue;
ze_group_count_t groupCount{1, 1, 1};
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
CmdListKernelLaunchParams launchParams = {};
result = cmdList.appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
EXPECT_EQ(1u, cmdList.checkAssertCalled);
}
using CommandQueueWithAssert = Test<DeviceFixture>;
TEST_F(CommandQueueWithAssert, GivenCmdListWithAssertWhenExecutingThenCommandQueuesPropertyIsSet) {
ze_command_queue_desc_t desc = {};
auto assertHandler = new MockAssertHandler(device->getNEODevice());
device->getNEODevice()->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
Mock<Kernel> kernel;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(NEO::defaultHwInfo->platform.eProductFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_group_count_t groupCount{1, 1, 1};
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
commandList->close();
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_TRUE(commandQueue->cmdListWithAssertExecuted);
commandQueue->destroy();
}
TEST_F(CommandQueueWithAssert, GivenAssertKernelExecutedAndNoAssertHandlerWhenCheckingAssertThenUnrecoverableIsCalled) {
ze_command_queue_desc_t desc = {};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
commandQueue->cmdListWithAssertExecuted = true;
EXPECT_THROW(commandQueue->checkAssert(), std::exception);
EXPECT_FALSE(commandQueue->cmdListWithAssertExecuted);
commandQueue->destroy();
}
TEST_F(CommandQueueWithAssert, GivenCmdListWithAssertExecutedWhenSynchronizeByPollingTaskCountCalledThenAssertIsChecked) {
ze_command_queue_desc_t desc = {};
auto assertHandler = new MockAssertHandler(device->getNEODevice());
device->getNEODevice()->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
commandQueue->cmdListWithAssertExecuted = true;
returnValue = commandQueue->synchronizeByPollingForTaskCount(0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(1u, assertHandler->printAssertAndAbortCalled);
EXPECT_FALSE(commandQueue->cmdListWithAssertExecuted);
commandQueue->destroy();
}
HWTEST_F(CommandQueueWithAssert, GivenCmdListWithAssertExecutedAndDetectedHangWhenSynchronizingByPollingThenAssertIsChecked) {
const ze_command_queue_desc_t desc{};
auto assertHandler = new MockAssertHandler(device->getNEODevice());
device->getNEODevice()->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
Mock<Kernel> kernel1;
TaskCountType currentTaskCount = 33u;
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
csr.callBaseWaitForCompletionWithTimeout = false;
csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
csr.returnWaitForCompletionWithTimeout = WaitStatus::GpuHang;
commandQueue->cmdListWithAssertExecuted = true;
returnValue = commandQueue->synchronizeByPollingForTaskCount(0u);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue);
EXPECT_EQ(1u, assertHandler->printAssertAndAbortCalled);
EXPECT_FALSE(commandQueue->cmdListWithAssertExecuted);
commandQueue->destroy();
}
TEST_F(CommandQueueWithAssert, GivenAssertKernelExecutedWhenSynchronizingFenceThenAssertIsChecked) {
ze_command_queue_desc_t desc = {};
auto assertHandler = new MockAssertHandler(device->getNEODevice());
device->getNEODevice()->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
commandQueue->cmdListWithAssertExecuted = true;
ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC,
nullptr,
0};
auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc));
ASSERT_NE(fence, nullptr);
fence->taskCount = 0;
ze_result_t result = fence->hostSynchronize(1);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
delete fence;
EXPECT_EQ(1u, assertHandler->printAssertAndAbortCalled);
EXPECT_FALSE(commandQueue->cmdListWithAssertExecuted);
commandQueue->destroy();
}
using EventAssertTest = Test<EventFixture<1, 0>>;
TEST_F(EventAssertTest, GivenGpuHangWhenHostSynchronizeIsCalledThenAssertIsChecked) {
const auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr->isGpuHangDetectedReturnValue = true;
event->csr = csr.get();
event->gpuHangCheckPeriod = std::chrono::microseconds::zero();
auto assertHandler = new MockAssertHandler(device->getNEODevice());
neoDevice->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
constexpr uint64_t timeout = std::numeric_limits<std::uint64_t>::max();
auto result = event->hostSynchronize(timeout);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
EXPECT_EQ(1u, assertHandler->printAssertAndAbortCalled);
}
TEST_F(EventAssertTest, GivenNoGpuHangAndOneNanosecondTimeoutWhenHostSynchronizeIsCalledThenAssertIsChecked) {
const auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr->isGpuHangDetectedReturnValue = false;
event->csr = csr.get();
event->gpuHangCheckPeriod = std::chrono::microseconds::zero();
auto assertHandler = new MockAssertHandler(device->getNEODevice());
neoDevice->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
constexpr uint64_t timeoutNanoseconds = 1;
auto result = event->hostSynchronize(timeoutNanoseconds);
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
EXPECT_EQ(1u, assertHandler->printAssertAndAbortCalled);
}
TEST_F(EventAssertTest, GivenEventSignalledWhenHostSynchronizeIsCalledThenAssertIsChecked) {
const auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
uint32_t *hostAddr = static_cast<uint32_t *>(event->getHostAddress());
*hostAddr = Event::STATE_SIGNALED;
event->setUsingContextEndOffset(false);
event->csr = csr.get();
auto assertHandler = new MockAssertHandler(device->getNEODevice());
neoDevice->getRootDeviceEnvironmentRef().assertHandler.reset(assertHandler);
constexpr uint64_t timeoutNanoseconds = 1;
auto result = event->hostSynchronize(timeoutNanoseconds);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, assertHandler->printAssertAndAbortCalled);
}
} // namespace ult
} // namespace L0

View File

@@ -457,7 +457,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
{
VariableBackup<std::array<bool, 3>> usesSyncBuffer{&kernelAttributes.flags.packed};
VariableBackup<std::array<bool, 4>> usesSyncBuffer{&kernelAttributes.flags.packed};
usesSyncBuffer = {};
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);

View File

@@ -36,7 +36,7 @@ class AssertHandler {
}
bool checkAssert() const;
void printAssertAndAbort();
MOCKABLE_VIRTUAL void printAssertAndAbort();
protected:
static constexpr size_t assertBufferSize = MemoryConstants::pageSize64k;

View File

@@ -110,8 +110,11 @@ struct KernelDescriptor {
bool hasRTCalls : 1;
bool isInvalid : 1;
bool hasSample : 1;
// 3
bool usesAssert : 1;
bool reserved : 7;
};
std::array<bool, 3> packed;
std::array<bool, 4> packed;
} flags = {};
static_assert(sizeof(KernelAttributes::flags) == sizeof(KernelAttributes::flags.packed), "");
@@ -164,6 +167,7 @@ struct KernelDescriptor {
ArgDescPointer systemThreadSurfaceAddress;
ArgDescPointer syncBufferAddress;
ArgDescPointer rtDispatchGlobals;
ArgDescPointer assertBufferAddress;
CrossThreadDataOffset privateMemorySize = undefined<CrossThreadDataOffset>;
CrossThreadDataOffset maxWorkGroupSize = undefined<CrossThreadDataOffset>;
CrossThreadDataOffset simdSize = undefined<CrossThreadDataOffset>;

View File

@@ -17,6 +17,7 @@ set(NEO_CORE_tests_mocks
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_create.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_allocation_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_assert_handler.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_center.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_csr.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_file_stream.h

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/assert_handler/assert_handler.h"
struct MockAssertHandler : NEO::AssertHandler {
using NEO::AssertHandler::assertBufferSize;
using NEO::AssertHandler::AssertHandler;
void printAssertAndAbort() override {
printAssertAndAbortCalled++;
NEO::AssertHandler::printAssertAndAbort();
}
uint32_t printAssertAndAbortCalled = 0;
};

View File

@@ -7,18 +7,13 @@
#include "shared/source/assert_handler/assert_handler.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/test/common/mocks/mock_assert_handler.h"
#include "shared/test/common/mocks/mock_device.h"
#include "gtest/gtest.h"
using namespace NEO;
struct MockAssertHandler : AssertHandler {
using AssertHandler::assertBufferSize;
using AssertHandler::AssertHandler;
};
TEST(AssertHandlerTests, WhenAssertHandlerIsCreatedThenAssertBufferIsAllocated) {
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));