Detect GPU hangs in flushBcsTask()

This change introduces detection of GPU hangs in flushBcsTask()
function. The new code has been covered with ULTs.

Related-To: NEO-6681
Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:
Patryk Wrobel
2022-05-05 16:52:25 +00:00
committed by Compute-Runtime-Automation
parent b4b1fb97bd
commit 9b2ad0c5df
31 changed files with 1291 additions and 794 deletions

View File

@@ -1140,7 +1140,9 @@ WaitStatus CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *pri
const auto waitStatus = waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps);
if (printfHandler) {
printfHandler->printEnqueueOutput();
if (!printfHandler->printEnqueueOutput()) {
return WaitStatus::GpuHang;
}
}
return waitStatus;

View File

@@ -337,6 +337,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
this->latestSentEnqueueType = enqueueProperties.operation;
}
if (completionStamp.taskCount == CompletionStamp::gpuHang) {
return CL_OUT_OF_RESOURCES;
}
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
if (blockQueue) {
@@ -819,7 +824,14 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
if (enqueueProperties.blitPropertiesContainer->size() > 0) {
auto bcsCsr = getBcsForAuxTranslation();
const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice());
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
if (!newTaskCount) {
CompletionStamp completionStamp{};
completionStamp.taskCount = CompletionStamp::gpuHang;
return completionStamp;
}
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), *newTaskCount);
dispatchFlags.implicitFlush = true;
}
@@ -1050,7 +1062,14 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
UNRECOVERABLE_IF(!enqueueProperties.blitPropertiesContainer);
const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice());
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
if (!newTaskCount) {
CompletionStamp completionStamp{};
completionStamp.taskCount = CompletionStamp::gpuHang;
return completionStamp;
}
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), *newTaskCount);
}
return completionStamp;
@@ -1157,6 +1176,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,
enqueueProperties, timestampPacketDependencies, eventsRequest,
eventBuilder, taskLevel, csrDeps, &bcsCsr);
if (completionStamp.taskCount == CompletionStamp::gpuHang) {
return CL_OUT_OF_RESOURCES;
}
if (gpgpuSubmission) {
commandStreamReceiverOwnership.unlock();
}

View File

@@ -261,9 +261,15 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
commandQueue.clearLastBcsPackets();
}
bool isGpuHangDetected{false};
if (kernelOperation->blitPropertiesContainer.size() > 0) {
const auto newTaskCount = bcsCsrForAuxTranslation->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount);
if (newTaskCount) {
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), *newTaskCount);
} else {
isGpuHangDetected = true;
}
}
commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::GpuKernel);
@@ -272,8 +278,14 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
if (printfHandler) {
commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
printfHandler->printEnqueueOutput();
const auto waitStatus = commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
if (waitStatus == WaitStatus::GpuHang) {
isGpuHangDetected = true;
}
if (!printfHandler->printEnqueueOutput()) {
isGpuHangDetected = true;
}
}
for (auto surface : surfaces) {
@@ -281,10 +293,14 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
surfaces.clear();
if (isGpuHangDetected) {
completionStamp.taskCount = CompletionStamp::gpuHang;
}
return completionStamp;
}
void CommandWithoutKernel::dispatchBlitOperation() {
bool CommandWithoutKernel::dispatchBlitOperation() {
auto bcsCsr = kernelOperation->bcsCsr;
UNRECOVERABLE_IF(bcsCsr == nullptr);
@@ -301,8 +317,14 @@ void CommandWithoutKernel::dispatchBlitOperation() {
}
const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
if (!newTaskCount) {
return false;
}
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), *newTaskCount);
commandQueue.setLastBcsPacket(bcsCsr->getOsContext().getEngineType());
return true;
}
CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) {
@@ -401,7 +423,9 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
}
if (kernelOperation->blitEnqueue) {
dispatchBlitOperation();
if (!dispatchBlitOperation()) {
completionStamp.taskCount = CompletionStamp::gpuHang;
}
}
commandQueue.updateLatestSentEnqueueType(enqueueOperationType);

View File

@@ -157,6 +157,6 @@ class CommandWithoutKernel : public Command {
public:
using Command::Command;
CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
void dispatchBlitOperation();
bool dispatchBlitOperation();
};
} // namespace NEO

View File

@@ -85,7 +85,7 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
commandStreamReceiver.makeResident(*printfSurface);
}
void PrintfHandler::printEnqueueOutput() {
bool PrintfHandler::printEnqueueOutput() {
auto &hwInfo = device.getHardwareInfo();
auto usesStringMap = kernel->getDescriptor().kernelAttributes.usesStringMap();
@@ -108,11 +108,18 @@ void PrintfHandler::printEnqueueOutput() {
printfOutputDecompressed.get(),
printfSurface->getGpuAddress(),
0, 0, 0, Vec3<size_t>(printfOutputSize, 0, 0), 0, 0, 0, 0));
bcsEngine.commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, device.getDevice());
const auto newTaskCount = bcsEngine.commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, device.getDevice());
if (!newTaskCount) {
return false;
}
}
PrintFormatter printFormatter(printfOutputBuffer, printfOutputSize, kernel->is32Bit(),
usesStringMap ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr);
printFormatter.printKernelOutput();
return true;
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -20,11 +20,11 @@ class PrintfHandler {
public:
static PrintfHandler *create(const MultiDispatchInfo &multiDispatchInfo, ClDevice &deviceArg);
~PrintfHandler();
MOCKABLE_VIRTUAL ~PrintfHandler();
void prepareDispatch(const MultiDispatchInfo &multiDispatchInfo);
void makeResident(CommandStreamReceiver &commandStreamReceiver);
void printEnqueueOutput();
MOCKABLE_VIRTUAL bool printEnqueueOutput();
GraphicsAllocation *getSurface() {
return printfSurface;

View File

@@ -6,7 +6,9 @@
set(IGDRCL_SRCS_tests_command_queue
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_1_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_2_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/command_enqueue_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_1_tests.cpp

View File

@@ -22,6 +22,7 @@
#include "opencl/source/event/user_event.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/command_queue/blit_enqueue_fixture.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
@@ -33,237 +34,6 @@ namespace NEO {
extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE];
template <int timestampPacketEnabled>
struct BlitEnqueueTests : public ::testing::Test {
class BcsMockContext : public MockContext {
public:
BcsMockContext(ClDevice *device) : MockContext(device) {
bcsOsContext.reset(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})));
bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()));
bcsCsr->setupContext(*bcsOsContext);
bcsCsr->initializeTagAllocation();
auto mockBlitMemoryToAllocation = [this](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
Vec3<size_t> size) -> BlitOperationResult {
if (!device.getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported) {
return BlitOperationResult::Unsupported;
}
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
*bcsCsr, memory, nullptr,
hostPtr,
memory->getGpuAddress(), 0,
0, 0, size, 0, 0, 0, 0);
BlitPropertiesContainer container;
container.push_back(blitProperties);
bcsCsr->flushBcsTask(container, true, false, const_cast<Device &>(device));
return BlitOperationResult::Success;
};
blitMemoryToAllocationFuncBackup = mockBlitMemoryToAllocation;
}
std::unique_ptr<OsContext> bcsOsContext;
std::unique_ptr<CommandStreamReceiver> bcsCsr;
VariableBackup<BlitHelperFunctions::BlitMemoryToAllocationFunc> blitMemoryToAllocationFuncBackup{
&BlitHelperFunctions::blitMemoryToAllocation};
};
template <typename FamilyType>
void setUpT() {
if (is32bit) {
GTEST_SKIP();
}
REQUIRE_AUX_RESOLVES();
DebugManager.flags.EnableTimestampPacket.set(timestampPacketEnabled);
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
DebugManager.flags.RenderCompressedBuffersEnabled.set(1);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
DebugManager.flags.CsrDispatchMode.set(static_cast<int32_t>(DispatchMode::ImmediateDispatch));
DebugManager.flags.EnableLocalMemory.set(1);
device = std::make_unique<MockClDevice>(MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
capabilityTable.blitterOperationsSupported = true;
if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isBlitterFullySupported(device->getHardwareInfo())) {
GTEST_SKIP();
}
if (createBcsEngine) {
auto &engine = device->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority);
bcsOsContext.reset(OsContext::create(nullptr, 1,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getDeviceBitfield())));
engine.osContext = bcsOsContext.get();
engine.commandStreamReceiver->setupContext(*bcsOsContext);
}
bcsMockContext = std::make_unique<BcsMockContext>(device.get());
auto mockCmdQueue = new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr);
commandQueue.reset(mockCmdQueue);
mockKernel = std::make_unique<MockKernelWithInternals>(*device, bcsMockContext.get());
auto mockProgram = mockKernel->mockProgram;
mockProgram->setAllowNonUniform(true);
gpgpuCsr = &mockCmdQueue->getGpgpuCommandStreamReceiver();
bcsCsr = mockCmdQueue->bcsEngines[0]->commandStreamReceiver;
}
template <typename FamilyType>
void tearDownT() {}
template <size_t N>
void setMockKernelArgs(std::array<Buffer *, N> buffers) {
for (uint32_t i = 0; i < buffers.size(); i++) {
mockKernel->kernelInfo.addArgBuffer(i, 0);
}
mockKernel->mockKernel->initialize();
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
for (uint32_t i = 0; i < buffers.size(); i++) {
cl_mem clMem = buffers[i];
mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem);
}
}
template <size_t N>
void setMockKernelArgs(std::array<GraphicsAllocation *, N> allocs) {
for (uint32_t i = 0; i < allocs.size(); i++) {
mockKernel->kernelInfo.addArgBuffer(i, 0);
}
mockKernel->mockKernel->initialize();
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
for (uint32_t i = 0; i < allocs.size(); i++) {
auto alloc = allocs[i];
auto ptr = reinterpret_cast<void *>(alloc->getGpuAddressToPatch());
mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc, 0u);
}
}
ReleaseableObjectPtr<Buffer> createBuffer(size_t size, bool compressed) {
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, size, nullptr, retVal));
auto graphicsAllocation = buffer->getGraphicsAllocation(device->getRootDeviceIndex());
setAllocationType(graphicsAllocation, compressed);
return buffer;
}
MockGraphicsAllocation *createGfxAllocation(size_t size, bool compressed) {
auto alloc = new MockGraphicsAllocation(nullptr, size);
setAllocationType(alloc, compressed);
return alloc;
}
void setAllocationType(GraphicsAllocation *graphicsAllocation, bool compressed) {
graphicsAllocation->setAllocationType(AllocationType::BUFFER);
if (compressed && !graphicsAllocation->getDefaultGmm()) {
auto gmmHelper = device->getRootDeviceEnvironment().getGmmHelper();
graphicsAllocation->setDefaultGmm(new Gmm(gmmHelper, nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true));
}
if (graphicsAllocation->getDefaultGmm()) {
graphicsAllocation->getDefaultGmm()->isCompressionEnabled = compressed;
}
}
template <typename Family>
GenCmdList getCmdList(LinearStream &linearStream, size_t offset) {
HardwareParse hwParser;
hwParser.parseCommands<Family>(linearStream, offset);
return hwParser.cmdList;
}
template <typename Family>
GenCmdList::iterator expectPipeControl(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
using PIPE_CONTROL = typename Family::PIPE_CONTROL;
PIPE_CONTROL *pipeControlCmd = nullptr;
GenCmdList::iterator commandItor = itorStart;
bool stallingWrite = false;
do {
commandItor = find<PIPE_CONTROL *>(commandItor, itorEnd);
if (itorEnd == commandItor) {
EXPECT_TRUE(false);
return itorEnd;
}
pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*commandItor);
stallingWrite = pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA &&
pipeControlCmd->getCommandStreamerStallEnable();
++commandItor;
} while (!stallingWrite);
return --commandItor;
}
template <typename Family>
GenCmdList::iterator expectMiFlush(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
Family *miFlushCmd = nullptr;
GenCmdList::iterator commandItor = itorStart;
bool miFlushWithMemoryWrite = false;
do {
commandItor = find<Family *>(commandItor, itorEnd);
if (itorEnd == commandItor) {
EXPECT_TRUE(false);
return itorEnd;
}
miFlushCmd = genCmdCast<Family *>(*commandItor);
miFlushWithMemoryWrite = miFlushCmd->getDestinationAddress() != 0;
++commandItor;
} while (!miFlushWithMemoryWrite);
return --commandItor;
}
template <typename Command>
GenCmdList::iterator expectCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
auto commandItor = find<Command *>(itorStart, itorEnd);
EXPECT_TRUE(commandItor != itorEnd);
return commandItor;
}
template <typename Command>
void expectNoCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
auto commandItor = find<Command *>(itorStart, itorEnd);
EXPECT_TRUE(commandItor == itorEnd);
}
template <typename Family>
void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) {
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
EXPECT_EQ(expectedAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
}
DebugManagerStateRestore restore;
std::unique_ptr<OsContext> bcsOsContext;
std::unique_ptr<MockClDevice> device;
std::unique_ptr<BcsMockContext> bcsMockContext;
std::unique_ptr<CommandQueue> commandQueue;
std::unique_ptr<MockKernelWithInternals> mockKernel;
CommandStreamReceiver *bcsCsr = nullptr;
CommandStreamReceiver *gpgpuCsr = nullptr;
size_t gws[3] = {63, 0, 0};
size_t lws[3] = {16, 0, 0};
uint32_t hostPtr = 0;
cl_int retVal = CL_SUCCESS;
};
using BlitAuxTranslationTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingCommandBufferThenEnsureCorrectOrder) {
@@ -335,6 +105,22 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstruct
}
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenGpuHangOnFlushBcsAndBlitAuxTranslationWhenConstructingCommandBufferThenOutOfResourcesIsReturned) {
auto buffer0 = createBuffer(1, true);
auto buffer1 = createBuffer(1, false);
auto buffer2 = createBuffer(1, true);
setMockKernelArgs(std::array<Buffer *, 3>{{buffer0.get(), buffer1.get(), buffer2.get()}});
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
ultBcsCsr->callBaseFlushBcsTask = false;
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
const auto result = mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr);
EXPECT_EQ(CL_OUT_OF_RESOURCES, result);
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingBlockedCommandBufferThenEnsureCorrectOrder) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
@@ -873,6 +659,28 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructing
EXPECT_FALSE(commandQueue->isQueueBlocked());
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenGpuHangOnFlushBcsTaskAndBlitTranslationWhenConstructingBlockedCommandBufferAndRunningItThenEventExecutionIsAborted) {
auto buffer0 = createBuffer(1, true);
auto buffer1 = createBuffer(1, true);
setMockKernelArgs(std::array<Buffer *, 2>{{buffer0.get(), buffer1.get()}});
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
ultBcsCsr->callBaseFlushBcsTask = false;
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
UserEvent userEvent;
cl_event waitlist[] = {&userEvent};
cl_event kernelEvent{};
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, &kernelEvent);
userEvent.setStatus(CL_COMPLETE);
auto abortedEvent = castToObjectOrAbort<Event>(kernelEvent);
EXPECT_EQ(Event::executionAbortedDueToGpuHang, abortedEvent->peekExecutionStatus());
abortedEvent->release();
}
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenEnqueueIsCalledThenDoImplicitFlushOnGpgpuCsr) {
auto buffer = createBuffer(1, true);
setMockKernelArgs(std::array<Buffer *, 1>{{buffer.get()}});
@@ -1132,6 +940,20 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenDebugFlagSetWhenDis
EXPECT_EQ(1u, miFlushAfterCopyFound);
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenGpuHangOnFlushBcsTaskAndDebugFlagSetWhenDispatchingBlitEnqueueThenOutOfResourcesIsReturned) {
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
ultBcsCsr->callBaseFlushBcsTask = false;
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
DebugManager.flags.PauseOnBlitCopy.set(1);
const auto result = commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(CL_OUT_OF_RESOURCES, result);
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenDebugFlagSetToMinusTwoWhenDispatchingBlitEnqueueThenAddPausingCommandsForEachEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
@@ -1359,6 +1181,34 @@ HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenBlockedQueueWhenBlitEnqueuedThenF
EXPECT_FALSE(commandQueue->isQueueBlocked());
}
HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenGpuHangOnFlushBcsTaskAndBlockedQueueWhenBlitEnqueuedThenEventIsAborted) {
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
uint32_t flushCounter = 0;
auto myUltGpgpuCsr = static_cast<MyUltCsr<FamilyType> *>(gpgpuCsr);
myUltGpgpuCsr->flushCounter = &flushCounter;
auto myUltBcsCsr = static_cast<MyUltCsr<FamilyType> *>(bcsCsr);
myUltBcsCsr->flushCounter = &flushCounter;
myUltBcsCsr->callBaseFlushBcsTask = false;
myUltBcsCsr->flushBcsTaskReturnValue = std::nullopt;
UserEvent userEvent;
cl_event waitlist[] = {&userEvent};
cl_event writeBufferEvent{};
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, waitlist, &writeBufferEvent);
userEvent.setStatus(CL_COMPLETE);
auto abortedEvent = castToObjectOrAbort<Event>(writeBufferEvent);
EXPECT_EQ(Event::executionAbortedDueToGpuHang, abortedEvent->peekExecutionStatus());
abortedEvent->release();
}
HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenDebugFlagSetWhenCheckingBcsCacheFlushRequirementThenReturnCorrectValue) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
@@ -1664,511 +1514,4 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenMarkerThatFollowsCopyOperatio
clReleaseEvent(outEvent2);
}
using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenProfilingEnabledWhenSubmittingWithoutFlushToGpgpuThenSetSubmitTime) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
mockCommandQueue->setProfilingEnabled();
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
cl_event clEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
auto event = castToObject<Event>(clEvent);
uint64_t submitTime = 0;
event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(submitTime), &submitTime, nullptr);
EXPECT_NE(0u, submitTime);
clReleaseEvent(clEvent);
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenOutEventWhenEnqueuingBcsSubmissionThenSetupBcsCsrInEvent) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
{
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
cl_event clEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, bcsCsr->peekTaskCount());
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
auto event = castToObject<Event>(clEvent);
EXPECT_EQ(0u, event->peekBcsTaskCountFromCommandQueue());
clReleaseEvent(clEvent);
}
{
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
cl_event clEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, bcsCsr->peekTaskCount());
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
auto event = castToObject<Event>(clEvent);
EXPECT_EQ(1u, event->peekBcsTaskCountFromCommandQueue());
clReleaseEvent(clEvent);
}
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyThenDontSubmitToGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup();
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenImmediateDispatchCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::ImmediateDispatch);
mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup();
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyAfterBarrierThenSubmitToGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueBarrierWithWaitList(0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
UserEvent userEvent;
cl_event waitlist = &userEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
EXPECT_FALSE(commandQueue->isQueueBlocked());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
UserEvent userEvent;
cl_event waitlist = &userEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
EXPECT_FALSE(commandQueue->isQueueBlocked());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThatRequiresCacheFlushThenSubmitToGpgpu) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
// enqueue kernel to force gpgpu submission on write buffer
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
auto offset = mockCommandQueue->getCS(0).getUsed();
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
auto cmdListBcs = getCmdList<FamilyType>(bcsCsr->getCS(0), 0);
auto cmdListQueue = getCmdList<FamilyType>(mockCommandQueue->getCS(0), offset);
uint64_t cacheFlushWriteAddress = 0;
{
auto cmdFound = expectPipeControl<FamilyType>(cmdListQueue.begin(), cmdListQueue.end());
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*cmdFound);
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
cacheFlushWriteAddress = NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControlCmd);
EXPECT_NE(0u, cacheFlushWriteAddress);
}
{
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdListBcs.begin(), cmdListBcs.end());
verifySemaphore<FamilyType>(cmdFound, cacheFlushWriteAddress);
cmdFound = expectCommand<XY_COPY_BLT>(cmdListBcs.begin(), cmdListBcs.end());
EXPECT_NE(cmdListBcs.end(), cmdFound);
}
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenClearDependencies) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
const bool clearDependencies = true;
{
TimestampPacketContainer previousNodes;
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *gpgpuCsr); // init
EXPECT_EQ(0u, previousNodes.peekNodes().size());
}
{
TimestampPacketContainer previousNodes;
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
EXPECT_EQ(0u, previousNodes.peekNodes().size());
}
}
using BlitCopyTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.KernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
kernelInfo.createKernelAllocation(device->getDevice(), false);
if (kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool()) {
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
} else {
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
}
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.KernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
kernelInfo.createKernelAllocation(device->getDevice(), false);
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.KernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
kernelInfo.createKernelAllocation(device->getDevice(), false);
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenLocalMemoryAccessNotAllowedWhenGlobalConstantsAreExportedThenUseBlitter) {
DebugManager.flags.EnableLocalMemory.set(1);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
char constantData[128] = {};
ProgramInfo programInfo;
programInfo.globalConstants.initData = constantData;
programInfo.globalConstants.size = sizeof(constantData);
auto mockLinkerInput = std::make_unique<WhiteBox<LinkerInput>>();
mockLinkerInput->traits.exportsGlobalConstants = true;
programInfo.linkerInput = std::move(mockLinkerInput);
MockProgram program(bcsMockContext.get(), false, toClDeviceVector(*device));
EXPECT_EQ(0u, bcsMockContext->bcsCsr->peekTaskCount());
program.processProgramInfo(programInfo, *device);
EXPECT_EQ(1u, bcsMockContext->bcsCsr->peekTaskCount());
auto rootDeviceIndex = device->getRootDeviceIndex();
ASSERT_NE(nullptr, program.getConstantSurface(rootDeviceIndex));
auto gpuAddress = reinterpret_cast<const void *>(program.getConstantSurface(rootDeviceIndex)->getGpuAddress());
EXPECT_NE(nullptr, bcsMockContext->getSVMAllocsManager()->getSVMAlloc(gpuAddress));
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenSubstituteKernelHeapIsCalledThenUseBcsForTransfer) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
MockKernelWithInternals kernel(*device);
const size_t initialHeapSize = 0x40;
kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize;
kernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
ASSERT_NE(nullptr, kernel.kernelInfo.kernelAllocation);
EXPECT_TRUE(kernel.kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool());
const size_t newHeapSize = initialHeapSize;
char newHeap[newHeapSize];
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
device->getMemoryManager()->freeGraphicsMemory(kernel.kernelInfo.kernelAllocation);
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenLinkerRequiresPatchingOfInstructionSegmentsThenUseBcsForTransfer) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
auto linkerInput = std::make_unique<WhiteBox<LinkerInput>>();
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
KernelInfo kernelInfo = {};
std::vector<char> kernelHeap;
kernelHeap.resize(32, 7);
kernelInfo.heapInfo.pKernelHeap = kernelHeap.data();
kernelInfo.heapInfo.KernelHeapSize = static_cast<uint32_t>(kernelHeap.size());
kernelInfo.createKernelAllocation(device->getDevice(), false);
ASSERT_NE(nullptr, kernelInfo.kernelAllocation);
EXPECT_TRUE(kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool());
std::vector<NEO::ExternalFunctionInfo> externalFunctions;
MockProgram program{nullptr, false, toClDeviceVector(*device)};
program.getKernelInfoArray(device->getRootDeviceIndex()).push_back(&kernelInfo);
program.setLinkerInput(device->getRootDeviceIndex(), std::move(linkerInput));
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, externalFunctions);
EXPECT_EQ(CL_SUCCESS, ret);
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
program.getKernelInfoArray(device->getRootDeviceIndex()).clear();
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
} // namespace NEO
} // namespace NEO

View File

@@ -0,0 +1,542 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/local_memory_access_modes.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/vec.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_timestamp_container.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/unit_test/compiler_interface/linker_mock.h"
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/event/user_event.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/command_queue/blit_enqueue_fixture.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
namespace NEO {
using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenProfilingEnabledWhenSubmittingWithoutFlushToGpgpuThenSetSubmitTime) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
mockCommandQueue->setProfilingEnabled();
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
cl_event clEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
auto event = castToObject<Event>(clEvent);
uint64_t submitTime = 0;
event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(submitTime), &submitTime, nullptr);
EXPECT_NE(0u, submitTime);
clReleaseEvent(clEvent);
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenOutEventWhenEnqueuingBcsSubmissionThenSetupBcsCsrInEvent) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
{
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
cl_event clEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, bcsCsr->peekTaskCount());
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
auto event = castToObject<Event>(clEvent);
EXPECT_EQ(0u, event->peekBcsTaskCountFromCommandQueue());
clReleaseEvent(clEvent);
}
{
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
cl_event clEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, bcsCsr->peekTaskCount());
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
auto event = castToObject<Event>(clEvent);
EXPECT_EQ(1u, event->peekBcsTaskCountFromCommandQueue());
clReleaseEvent(clEvent);
}
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyThenDontSubmitToGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup();
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenImmediateDispatchCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::ImmediateDispatch);
mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup();
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyAfterBarrierThenSubmitToGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueBarrierWithWaitList(0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
UserEvent userEvent;
cl_event waitlist = &userEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
EXPECT_FALSE(commandQueue->isQueueBlocked());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
UserEvent userEvent;
cl_event waitlist = &userEvent;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
EXPECT_FALSE(commandQueue->isQueueBlocked());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThatRequiresCacheFlushThenSubmitToGpgpu) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
// enqueue kernel to force gpgpu submission on write buffer
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
auto offset = mockCommandQueue->getCS(0).getUsed();
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
auto cmdListBcs = getCmdList<FamilyType>(bcsCsr->getCS(0), 0);
auto cmdListQueue = getCmdList<FamilyType>(mockCommandQueue->getCS(0), offset);
uint64_t cacheFlushWriteAddress = 0;
{
auto cmdFound = expectPipeControl<FamilyType>(cmdListQueue.begin(), cmdListQueue.end());
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*cmdFound);
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
cacheFlushWriteAddress = NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControlCmd);
EXPECT_NE(0u, cacheFlushWriteAddress);
}
{
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdListBcs.begin(), cmdListBcs.end());
verifySemaphore<FamilyType>(cmdFound, cacheFlushWriteAddress);
cmdFound = expectCommand<XY_COPY_BLT>(cmdListBcs.begin(), cmdListBcs.end());
EXPECT_NE(cmdListBcs.end(), cmdFound);
}
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenClearDependencies) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
const bool clearDependencies = true;
{
TimestampPacketContainer previousNodes;
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *gpgpuCsr); // init
EXPECT_EQ(0u, previousNodes.peekNodes().size());
}
{
TimestampPacketContainer previousNodes;
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
EXPECT_EQ(0u, previousNodes.peekNodes().size());
}
}
using BlitCopyTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.KernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
kernelInfo.createKernelAllocation(device->getDevice(), false);
if (kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool()) {
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
} else {
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
}
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.KernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
kernelInfo.createKernelAllocation(device->getDevice(), false);
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.KernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
kernelInfo.createKernelAllocation(device->getDevice(), false);
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenLocalMemoryAccessNotAllowedWhenGlobalConstantsAreExportedThenUseBlitter) {
DebugManager.flags.EnableLocalMemory.set(1);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
char constantData[128] = {};
ProgramInfo programInfo;
programInfo.globalConstants.initData = constantData;
programInfo.globalConstants.size = sizeof(constantData);
auto mockLinkerInput = std::make_unique<WhiteBox<LinkerInput>>();
mockLinkerInput->traits.exportsGlobalConstants = true;
programInfo.linkerInput = std::move(mockLinkerInput);
MockProgram program(bcsMockContext.get(), false, toClDeviceVector(*device));
EXPECT_EQ(0u, bcsMockContext->bcsCsr->peekTaskCount());
program.processProgramInfo(programInfo, *device);
EXPECT_EQ(1u, bcsMockContext->bcsCsr->peekTaskCount());
auto rootDeviceIndex = device->getRootDeviceIndex();
ASSERT_NE(nullptr, program.getConstantSurface(rootDeviceIndex));
auto gpuAddress = reinterpret_cast<const void *>(program.getConstantSurface(rootDeviceIndex)->getGpuAddress());
EXPECT_NE(nullptr, bcsMockContext->getSVMAllocsManager()->getSVMAlloc(gpuAddress));
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenSubstituteKernelHeapIsCalledThenUseBcsForTransfer) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
MockKernelWithInternals kernel(*device);
const size_t initialHeapSize = 0x40;
kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize;
kernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
ASSERT_NE(nullptr, kernel.kernelInfo.kernelAllocation);
EXPECT_TRUE(kernel.kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool());
const size_t newHeapSize = initialHeapSize;
char newHeap[newHeapSize];
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
device->getMemoryManager()->freeGraphicsMemory(kernel.kernelInfo.kernelAllocation);
}
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenLinkerRequiresPatchingOfInstructionSegmentsThenUseBcsForTransfer) {
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
auto linkerInput = std::make_unique<WhiteBox<LinkerInput>>();
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
KernelInfo kernelInfo = {};
std::vector<char> kernelHeap;
kernelHeap.resize(32, 7);
kernelInfo.heapInfo.pKernelHeap = kernelHeap.data();
kernelInfo.heapInfo.KernelHeapSize = static_cast<uint32_t>(kernelHeap.size());
kernelInfo.createKernelAllocation(device->getDevice(), false);
ASSERT_NE(nullptr, kernelInfo.kernelAllocation);
EXPECT_TRUE(kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool());
std::vector<NEO::ExternalFunctionInfo> externalFunctions;
MockProgram program{nullptr, false, toClDeviceVector(*device)};
program.getKernelInfoArray(device->getRootDeviceIndex()).push_back(&kernelInfo);
program.setLinkerInput(device->getRootDeviceIndex(), std::move(linkerInput));
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, externalFunctions);
EXPECT_EQ(CL_SUCCESS, ret);
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
program.getKernelInfoArray(device->getRootDeviceIndex()).clear();
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
} // namespace NEO

View File

@@ -0,0 +1,267 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/local_memory_access_modes.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/vec.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_timestamp_container.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/unit_test/compiler_interface/linker_mock.h"
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/event/user_event.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
namespace NEO {
template <int timestampPacketEnabled>
struct BlitEnqueueTests : public ::testing::Test {
class BcsMockContext : public MockContext {
public:
BcsMockContext(ClDevice *device) : MockContext(device) {
bcsOsContext.reset(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})));
bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()));
bcsCsr->setupContext(*bcsOsContext);
bcsCsr->initializeTagAllocation();
auto mockBlitMemoryToAllocation = [this](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
Vec3<size_t> size) -> BlitOperationResult {
if (!device.getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported) {
return BlitOperationResult::Unsupported;
}
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
*bcsCsr, memory, nullptr,
hostPtr,
memory->getGpuAddress(), 0,
0, 0, size, 0, 0, 0, 0);
BlitPropertiesContainer container;
container.push_back(blitProperties);
bcsCsr->flushBcsTask(container, true, false, const_cast<Device &>(device));
return BlitOperationResult::Success;
};
blitMemoryToAllocationFuncBackup = mockBlitMemoryToAllocation;
}
std::unique_ptr<OsContext> bcsOsContext;
std::unique_ptr<CommandStreamReceiver> bcsCsr;
VariableBackup<BlitHelperFunctions::BlitMemoryToAllocationFunc> blitMemoryToAllocationFuncBackup{
&BlitHelperFunctions::blitMemoryToAllocation};
};
template <typename FamilyType>
void setUpT() {
if (is32bit) {
GTEST_SKIP();
}
REQUIRE_AUX_RESOLVES();
DebugManager.flags.EnableTimestampPacket.set(timestampPacketEnabled);
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
DebugManager.flags.RenderCompressedBuffersEnabled.set(1);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
DebugManager.flags.CsrDispatchMode.set(static_cast<int32_t>(DispatchMode::ImmediateDispatch));
DebugManager.flags.EnableLocalMemory.set(1);
device = std::make_unique<MockClDevice>(MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
capabilityTable.blitterOperationsSupported = true;
if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isBlitterFullySupported(device->getHardwareInfo())) {
GTEST_SKIP();
}
if (createBcsEngine) {
auto &engine = device->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority);
bcsOsContext.reset(OsContext::create(nullptr, 1,
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getDeviceBitfield())));
engine.osContext = bcsOsContext.get();
engine.commandStreamReceiver->setupContext(*bcsOsContext);
}
bcsMockContext = std::make_unique<BcsMockContext>(device.get());
auto mockCmdQueue = new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr);
commandQueue.reset(mockCmdQueue);
mockKernel = std::make_unique<MockKernelWithInternals>(*device, bcsMockContext.get());
auto mockProgram = mockKernel->mockProgram;
mockProgram->setAllowNonUniform(true);
gpgpuCsr = &mockCmdQueue->getGpgpuCommandStreamReceiver();
bcsCsr = mockCmdQueue->bcsEngines[0]->commandStreamReceiver;
}
template <typename FamilyType>
void tearDownT() {}
template <size_t N>
void setMockKernelArgs(std::array<Buffer *, N> buffers) {
for (uint32_t i = 0; i < buffers.size(); i++) {
mockKernel->kernelInfo.addArgBuffer(i, 0);
}
mockKernel->mockKernel->initialize();
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
for (uint32_t i = 0; i < buffers.size(); i++) {
cl_mem clMem = buffers[i];
mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem);
}
}
template <size_t N>
void setMockKernelArgs(std::array<GraphicsAllocation *, N> allocs) {
for (uint32_t i = 0; i < allocs.size(); i++) {
mockKernel->kernelInfo.addArgBuffer(i, 0);
}
mockKernel->mockKernel->initialize();
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
for (uint32_t i = 0; i < allocs.size(); i++) {
auto alloc = allocs[i];
auto ptr = reinterpret_cast<void *>(alloc->getGpuAddressToPatch());
mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc, 0u);
}
}
ReleaseableObjectPtr<Buffer> createBuffer(size_t size, bool compressed) {
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, size, nullptr, retVal));
auto graphicsAllocation = buffer->getGraphicsAllocation(device->getRootDeviceIndex());
setAllocationType(graphicsAllocation, compressed);
return buffer;
}
MockGraphicsAllocation *createGfxAllocation(size_t size, bool compressed) {
auto alloc = new MockGraphicsAllocation(nullptr, size);
setAllocationType(alloc, compressed);
return alloc;
}
void setAllocationType(GraphicsAllocation *graphicsAllocation, bool compressed) {
graphicsAllocation->setAllocationType(AllocationType::BUFFER);
if (compressed && !graphicsAllocation->getDefaultGmm()) {
auto gmmHelper = device->getRootDeviceEnvironment().getGmmHelper();
graphicsAllocation->setDefaultGmm(new Gmm(gmmHelper, nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true));
}
if (graphicsAllocation->getDefaultGmm()) {
graphicsAllocation->getDefaultGmm()->isCompressionEnabled = compressed;
}
}
template <typename Family>
GenCmdList getCmdList(LinearStream &linearStream, size_t offset) {
HardwareParse hwParser;
hwParser.parseCommands<Family>(linearStream, offset);
return hwParser.cmdList;
}
template <typename Family>
GenCmdList::iterator expectPipeControl(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
using PIPE_CONTROL = typename Family::PIPE_CONTROL;
PIPE_CONTROL *pipeControlCmd = nullptr;
GenCmdList::iterator commandItor = itorStart;
bool stallingWrite = false;
do {
commandItor = find<PIPE_CONTROL *>(commandItor, itorEnd);
if (itorEnd == commandItor) {
EXPECT_TRUE(false);
return itorEnd;
}
pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*commandItor);
stallingWrite = pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA &&
pipeControlCmd->getCommandStreamerStallEnable();
++commandItor;
} while (!stallingWrite);
return --commandItor;
}
template <typename Family>
GenCmdList::iterator expectMiFlush(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
Family *miFlushCmd = nullptr;
GenCmdList::iterator commandItor = itorStart;
bool miFlushWithMemoryWrite = false;
do {
commandItor = find<Family *>(commandItor, itorEnd);
if (itorEnd == commandItor) {
EXPECT_TRUE(false);
return itorEnd;
}
miFlushCmd = genCmdCast<Family *>(*commandItor);
miFlushWithMemoryWrite = miFlushCmd->getDestinationAddress() != 0;
++commandItor;
} while (!miFlushWithMemoryWrite);
return --commandItor;
}
template <typename Command>
GenCmdList::iterator expectCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
auto commandItor = find<Command *>(itorStart, itorEnd);
EXPECT_TRUE(commandItor != itorEnd);
return commandItor;
}
template <typename Command>
void expectNoCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
auto commandItor = find<Command *>(itorStart, itorEnd);
EXPECT_TRUE(commandItor == itorEnd);
}
template <typename Family>
void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) {
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
EXPECT_EQ(expectedAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
}
DebugManagerStateRestore restore;
std::unique_ptr<OsContext> bcsOsContext;
std::unique_ptr<MockClDevice> device;
std::unique_ptr<BcsMockContext> bcsMockContext;
std::unique_ptr<CommandQueue> commandQueue;
std::unique_ptr<MockKernelWithInternals> mockKernel;
CommandStreamReceiver *bcsCsr = nullptr;
CommandStreamReceiver *gpgpuCsr = nullptr;
size_t gws[3] = {63, 0, 0};
size_t lws[3] = {16, 0, 0};
uint32_t hostPtr = 0;
cl_int retVal = CL_SUCCESS;
};
} // namespace NEO

View File

@@ -29,6 +29,7 @@
#include "opencl/test/unit_test/mocks/mock_event.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "opencl/test/unit_test/mocks/mock_printf_handler.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
using namespace NEO;
@@ -1121,6 +1122,18 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe
EXPECT_EQ(mockCsr->flushCalledCount, 1);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenGpuHangOnPrintEnqueueOutputWhenWaitingForEnginesThenGpuHangIsReported) {
MockCommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, nullptr);
commandQueue.waitUntilCompleteReturnValue = WaitStatus::Ready;
const auto blockedQueue{false};
const auto cleanTemporaryAllocationsList{false};
MockPrintfHandler printfHandler(*pClDevice);
const auto waitStatus = commandQueue.waitForAllEngines(blockedQueue, &printfHandler, cleanTemporaryAllocationsList);
EXPECT_EQ(WaitStatus::GpuHang, waitStatus);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdateTaskCountFromWaitSetWhenFlushTaskThenPipeControlAndBBSIsFlushed) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(1);

View File

@@ -606,29 +606,31 @@ HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) {
EXPECT_EQ(newTaskCount, csr.latestWaitForCompletionWithTimeoutTaskCount.load());
}
HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
class MyMockCsr : public UltCommandStreamReceiver<FamilyType> {
public:
using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;
template <typename FamilyType>
class MyMockCsr : public UltCommandStreamReceiver<FamilyType> {
public:
using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, QueueThrottle throttle) override {
waitForTaskCountWithKmdNotifyFallbackCalled++;
taskCountToWaitPassed = taskCountToWait;
flushStampToWaitPassed = flushStampToWait;
useQuickKmdSleepPassed = useQuickKmdSleep;
throttlePassed = throttle;
return WaitStatus::Ready;
}
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, QueueThrottle throttle) override {
waitForTaskCountWithKmdNotifyFallbackCalled++;
taskCountToWaitPassed = taskCountToWait;
flushStampToWaitPassed = flushStampToWait;
useQuickKmdSleepPassed = useQuickKmdSleep;
throttlePassed = throttle;
return waitForTaskCountWithKmdNotifyFallbackReturnValue;
}
FlushStamp flushStampToWaitPassed = 0;
uint32_t taskCountToWaitPassed = 0;
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
bool useQuickKmdSleepPassed = false;
QueueThrottle throttlePassed = QueueThrottle::MEDIUM;
};
FlushStamp flushStampToWaitPassed = 0;
uint32_t taskCountToWaitPassed = 0;
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
bool useQuickKmdSleepPassed = false;
QueueThrottle throttlePassed = QueueThrottle::MEDIUM;
WaitStatus waitForTaskCountWithKmdNotifyFallbackReturnValue{WaitStatus::Ready};
};
auto myMockCsr = std::make_unique<MyMockCsr>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
HWTEST_F(BcsTests, GivenNoneGpuHangWhenBlitFromHostPtrCalledThenCallWaitWithKmdFallbackAndNewTaskCountIsReturned) {
auto myMockCsr = std::make_unique<MyMockCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto &bcsOsContext = pDevice->getUltCommandStreamReceiver<FamilyType>().getOsContext();
myMockCsr->initializeTagAllocation();
myMockCsr->setupContext(bcsOsContext);
@@ -648,11 +650,52 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
graphicsAllocation->getGpuAddress(), 0,
0, 0, {1, 1, 1}, 0, 0, 0, 0);
flushBcsTask(myMockCsr.get(), blitProperties, false, *pDevice);
const auto taskCount1 = flushBcsTask(myMockCsr.get(), blitProperties, false, *pDevice);
EXPECT_TRUE(taskCount1.has_value());
EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
flushBcsTask(myMockCsr.get(), blitProperties, true, *pDevice);
const auto taskCount2 = flushBcsTask(myMockCsr.get(), blitProperties, true, *pDevice);
EXPECT_TRUE(taskCount2.has_value());
EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed);
EXPECT_EQ(myMockCsr->flushStamp->peekStamp(), myMockCsr->flushStampToWaitPassed);
EXPECT_FALSE(myMockCsr->useQuickKmdSleepPassed);
EXPECT_EQ(myMockCsr->throttlePassed, QueueThrottle::MEDIUM);
EXPECT_EQ(1u, myMockCsr->activePartitions);
}
HWTEST_F(BcsTests, GivenGpuHangWhenBlitFromHostPtrCalledThenCallWaitWithKmdFallbackAndDoNotReturnNewTaskCount) {
auto myMockCsr = std::make_unique<MyMockCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto &bcsOsContext = pDevice->getUltCommandStreamReceiver<FamilyType>().getOsContext();
myMockCsr->initializeTagAllocation();
myMockCsr->setupContext(bcsOsContext);
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
constexpr size_t hostAllocationSize = MemoryConstants::pageSize;
auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize);
void *hostPtr = reinterpret_cast<void *>(hostAllocationPtr.get());
auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
*myMockCsr, graphicsAllocation, nullptr,
hostPtr,
graphicsAllocation->getGpuAddress(), 0,
0, 0, {1, 1, 1}, 0, 0, 0, 0);
const auto taskCount1 = flushBcsTask(myMockCsr.get(), blitProperties, false, *pDevice);
EXPECT_TRUE(taskCount1.has_value());
EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
myMockCsr->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang;
const auto taskCount2 = flushBcsTask(myMockCsr.get(), blitProperties, true, *pDevice);
EXPECT_FALSE(taskCount2.has_value());
EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed);

View File

@@ -22,7 +22,7 @@ struct BcsTests : public Test<ClDeviceFixture> {
Test<ClDeviceFixture>::TearDown();
}
uint32_t flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) {
std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) {
BlitPropertiesContainer container;
container.push_back(blitProperties);

View File

@@ -517,6 +517,55 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenLocalMemoryAllocationWhenB
}
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenGpuHangOnFlushBcsTaskAndLocalMemoryAllocationWhenBlitMemoryToAllocationIsCalledThenGpuHangIsReturned) {
if (is32bit) {
GTEST_SKIP();
}
DebugManagerStateRestore restore;
DebugManager.flags.EnableLocalMemory.set(true);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
VariableBackup<HardwareInfo> backupHwInfo(defaultHwInfo.get());
defaultHwInfo->capabilityTable.blitterOperationsSupported = true;
UltClDeviceFactory deviceFactory{1, 2};
auto testedDevice = deviceFactory.rootDevices[0];
MockContext context(testedDevice);
cl_int retVal;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(&context, {}, 1, nullptr, retVal));
auto memory = buffer->getGraphicsAllocation(testedDevice->getRootDeviceIndex());
uint8_t hostMemory[1];
auto executionEnv = testedDevice->getExecutionEnvironment();
executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
EXPECT_EQ(BlitOperationResult::Unsupported, BlitHelper::blitMemoryToAllocation(buffer->getContext()->getDevice(0)->getDevice(), memory, buffer->getOffset(), hostMemory, {1, 1, 1}));
executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
const auto rootDevice = testedDevice->getDevice().getRootDevice();
const auto blitDevice = rootDevice->getNearestGenericSubDevice(0);
auto &selectorCopyEngine = blitDevice->getSelectorCopyEngine();
auto deviceBitfield = blitDevice->getDeviceBitfield();
const auto &hwInfo = testedDevice->getDevice().getHardwareInfo();
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto internalUsage = true;
auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, internalUsage);
auto bcsEngineUsage = hwHelper.preferInternalBcsEngine() ? EngineUsage::Internal : EngineUsage::Regular;
auto bcsEngine = blitDevice->tryGetEngine(bcsEngineType, bcsEngineUsage);
ASSERT_NE(nullptr, bcsEngine);
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine->commandStreamReceiver);
ultBcsCsr->callBaseFlushBcsTask = false;
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
EXPECT_EQ(BlitOperationResult::GpuHang, BlitHelper::blitMemoryToAllocation(buffer->getContext()->getDevice(0)->getDevice(), memory, buffer->getOffset(), hostMemory, {1, 1, 1}));
}
struct AllocationReuseContextTest : ContextTest {
void addMappedPtr(Buffer &buffer, void *ptr, size_t ptrLength) {
auto &handler = context->getMapOperationsStorage().getHandler(&buffer);

View File

@@ -31,6 +31,7 @@
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_mdi.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "opencl/test/unit_test/mocks/mock_printf_handler.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "opencl/test/unit_test/os_interface/mock_performance_counters.h"
@@ -587,6 +588,105 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
EXPECT_FALSE(surface->isResident(pDevice->getDefaultEngine().osContext->getContextId()));
}
TEST_F(InternalsEventTest, givenGpuHangOnCmdQueueWaitFunctionAndBlockedKernelWithPrintfWhenSubmittedThenEventIsAbortedAndHangIsReported) {
MockCommandQueue mockCmdQueue(mockContext, pClDevice, nullptr, false);
mockCmdQueue.waitUntilCompleteReturnValue = WaitStatus::GpuHang;
testing::internal::CaptureStdout();
MockEvent<Event> event(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()}));
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh);
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh);
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh);
auto blockedCommandsData = std::make_unique<KernelOperation>(cmdStream, *mockCmdQueue.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandsData->setHeaps(dsh, ioh, ssh);
std::string testString = "test";
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
auto pKernel = mockKernelWithInternals.mockKernel;
auto &kernelInfo = mockKernelWithInternals.kernelInfo;
kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
kernelInfo.setPrintfSurface(sizeof(uintptr_t), 0);
kernelInfo.addToPrintfStringsMap(0, testString);
uint64_t crossThread[10];
pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8);
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
std::unique_ptr<PrintfHandler> printfHandler(PrintfHandler::create(multiDispatchInfo, *pClDevice));
printfHandler.get()->prepareDispatch(multiDispatchInfo);
auto surface = printfHandler.get()->getSurface();
auto printfSurface = reinterpret_cast<uint32_t *>(surface->getUnderlyingBuffer());
printfSurface[0] = 8;
printfSurface[1] = 0;
std::vector<Surface *> v;
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
event.setCommand(std::unique_ptr<Command>(cmd));
event.submitCommand(false);
EXPECT_EQ(Event::executionAbortedDueToGpuHang, event.peekExecutionStatus());
std::string output = testing::internal::GetCapturedStdout();
EXPECT_STREQ("test", output.c_str());
}
TEST_F(InternalsEventTest, givenGpuHangOnPrintingEnqueueOutputAndBlockedKernelWithPrintfWhenSubmittedThenEventIsAbortedAndHangIsReported) {
MockCommandQueue mockCmdQueue(mockContext, pClDevice, nullptr, false);
testing::internal::CaptureStdout();
MockEvent<Event> event(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()}));
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh);
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh);
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh);
auto blockedCommandsData = std::make_unique<KernelOperation>(cmdStream, *mockCmdQueue.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandsData->setHeaps(dsh, ioh, ssh);
std::string testString = "test";
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
auto pKernel = mockKernelWithInternals.mockKernel;
auto &kernelInfo = mockKernelWithInternals.kernelInfo;
kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
kernelInfo.setPrintfSurface(sizeof(uintptr_t), 0);
kernelInfo.addToPrintfStringsMap(0, testString);
uint64_t crossThread[10];
pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8);
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
std::unique_ptr<MockPrintfHandler> printfHandler(new MockPrintfHandler(*pClDevice));
printfHandler.get()->prepareDispatch(multiDispatchInfo);
auto surface = printfHandler.get()->getSurface();
auto printfSurface = reinterpret_cast<uint32_t *>(surface->getUnderlyingBuffer());
printfSurface[0] = 8;
printfSurface[1] = 0;
std::vector<Surface *> v;
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
event.setCommand(std::unique_ptr<Command>(cmd));
event.submitCommand(false);
EXPECT_EQ(Event::executionAbortedDueToGpuHang, event.peekExecutionStatus());
std::string output = testing::internal::GetCapturedStdout();
EXPECT_TRUE(output.empty());
}
TEST_F(InternalsEventTest, GivenMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) {
auto pCmdQ = make_releaseable<MockCommandQueue>(mockContext, pClDevice, nullptr, false);
MockEvent<Event> event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0);

View File

@@ -542,7 +542,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override {
return WaitStatus::Ready;
}
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
CompletionStamp flushTask(
LinearStream &commandStream,

View File

@@ -22,6 +22,7 @@ set(IGDRCL_SRCS_tests_mocks
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_platform.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_printf_handler.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_program.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_program.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h

View File

@@ -348,6 +348,10 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
latestTaskCountWaited = gpgpuTaskCountToWait;
if (waitUntilCompleteReturnValue.has_value()) {
return *waitUntilCompleteReturnValue;
}
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
}
@@ -395,6 +399,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
std::atomic<uint32_t> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
bool flushCalled = false;
std::optional<WaitStatus> waitForAllEnginesReturnValue{};
std::optional<WaitStatus> waitUntilCompleteReturnValue{};
int waitForAllEnginesCalledCount{0};
LinearStream *peekCommandStream() {

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "opencl/source/program/printf_handler.h"
using namespace NEO;
class MockPrintfHandler : public PrintfHandler {
public:
MockPrintfHandler(ClDevice &device) : PrintfHandler{device} {}
~MockPrintfHandler() override = default;
bool printEnqueueOutput() override {
return false;
}
};

View File

@@ -147,7 +147,7 @@ HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOut
printfHandler->prepareDispatch(multiDispatchInfo);
EXPECT_NE(nullptr, printfHandler->getSurface());
printfHandler->printEnqueueOutput();
EXPECT_TRUE(printfHandler->printEnqueueOutput());
auto &bcsEngine = device->getEngine(EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), device->getSelectorCopyEngine(), true), EngineUsage::Regular);
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine.commandStreamReceiver);
@@ -161,6 +161,41 @@ HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOut
}
}
HWTEST_F(PrintfHandlerTests, givenGpuHangOnFlushBcsStreamAndEnabledStatelessCompressionWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedToDecompressPrintfOutputAndFalseIsReturned) {
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = true;
REQUIRE_BLITTER_OR_SKIP(&hwInfo);
DebugManagerStateRestore restore;
DebugManager.flags.EnableStatelessCompression.set(1);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
MockContext context(device.get());
auto kernelInfo = std::make_unique<MockKernelInfo>();
kernelInfo->setPrintfSurface(sizeof(uintptr_t), 0);
auto program = std::make_unique<MockProgram>(&context, false, toClDeviceVector(*device));
uint64_t crossThread[10];
auto kernel = std::make_unique<MockKernel>(program.get(), *kernelInfo, *device);
kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8);
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel.get());
std::unique_ptr<PrintfHandler> printfHandler(PrintfHandler::create(multiDispatchInfo, *device));
printfHandler->prepareDispatch(multiDispatchInfo);
EXPECT_NE(nullptr, printfHandler->getSurface());
auto &bcsEngine = device->getEngine(EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), device->getSelectorCopyEngine(), true), EngineUsage::Regular);
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine.commandStreamReceiver);
bcsCsr->callBaseFlushBcsTask = false;
bcsCsr->flushBcsTaskReturnValue = std::nullopt;
EXPECT_FALSE(printfHandler->printEnqueueOutput());
EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
EXPECT_EQ(BlitterConstants::BlitDirection::BufferToHostPtr, bcsCsr->receivedBlitProperties[0].blitDirection);
}
HWTEST_F(PrintfHandlerTests, givenDisallowedLocalMemoryCpuAccessWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedToCopyPrintfOutput) {
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = true;

View File

@@ -40,7 +40,7 @@ struct BlitXE_HP_CORETests : public ::testing::Test {
clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
}
uint32_t flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);

View File

@@ -37,7 +37,7 @@ struct BlitXeHpcCoreTests : public ::testing::Test {
clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
}
uint32_t flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);

View File

@@ -38,7 +38,7 @@ struct BlitXeHpgCoreTests : public ::testing::Test {
clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
}
uint32_t flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);

View File

@@ -30,6 +30,7 @@
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <optional>
namespace NEO {
class AllocationsList;
@@ -227,7 +228,7 @@ class CommandStreamReceiver {
this->latestFlushedTaskCount = latestFlushedTaskCount;
}
virtual uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
virtual std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
virtual void flushTagUpdate() = 0;
virtual void updateTagFromWait() = 0;

View File

@@ -18,6 +18,8 @@
#include "hw_cmds.h"
#include <optional>
namespace NEO {
template <typename GfxFamily>
class DeviceCommandStreamReceiver;
@@ -96,7 +98,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
return CommandStreamReceiverType::CSR_HW;
}
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
void flushTagUpdate() override;
void flushMiFlushDW();

View File

@@ -1015,7 +1015,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::unregisterDirectSubmissionFromCo
}
template <typename GfxFamily>
uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
std::optional<uint32_t> CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
@@ -1128,8 +1128,12 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesCo
lock.unlock();
if (blocking) {
waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, QueueThrottle::MEDIUM);
const auto waitStatus = waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, QueueThrottle::MEDIUM);
internalAllocationStorage->cleanAllocationList(newTaskCount, TEMPORARY_ALLOCATION);
if (waitStatus == WaitStatus::GpuHang) {
return std::nullopt;
}
}
return newTaskCount;

View File

@@ -209,7 +209,11 @@ BlitOperationResult BlitHelper::blitMemoryToAllocationBanks(const Device &device
hostPtr,
(memory->getGpuAddress() + offset),
0, 0, 0, size, 0, 0, 0, 0));
bcsEngine->commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, *pDeviceForBlit);
const auto newTaskCount = bcsEngine->commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, *pDeviceForBlit);
if (!newTaskCount) {
return BlitOperationResult::GpuHang;
}
}
return BlitOperationResult::Success;

View File

@@ -96,7 +96,8 @@ struct BlitProperties {
enum class BlitOperationResult {
Unsupported,
Fail,
Success
Success,
GpuHang
};
namespace BlitHelperFunctions {

View File

@@ -49,7 +49,7 @@ class UltAubCommandStreamReceiver : public AUBCommandStreamReceiverHw<GfxFamily>
return csr;
}
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
blitBufferCalled++;
return BaseClass::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
}

View File

@@ -269,10 +269,15 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
return CommandStreamReceiverHw<GfxFamily>::obtainUniqueOwnership();
}
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
blitBufferCalled++;
receivedBlitProperties = blitPropertiesContainer;
return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
if (callBaseFlushBcsTask) {
return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
} else {
return flushBcsTaskReturnValue;
}
}
bool createPerDssBackedBuffer(Device &device) override {
@@ -368,6 +373,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
bool shouldFlushBatchedSubmissionsReturnSuccess = false;
WaitStatus returnWaitForCompletionWithTimeout = WaitStatus::Ready;
std::optional<WaitStatus> waitForTaskCountWithKmdNotifyFallbackReturnValue{};
bool callBaseFlushBcsTask{true};
std::optional<uint32_t> flushBcsTaskReturnValue{};
};
} // namespace NEO

View File

@@ -112,7 +112,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
return WaitStatus::Ready;
}
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
CommandStreamReceiverType getType() override {
return CommandStreamReceiverType::CSR_HW;
@@ -277,7 +277,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
return completionStamp;
}
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
if (!skipBlitCalls) {
return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
}