mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Detect GPU hangs in flushBcsTask()
This change introduces detection of GPU hangs in flushBcsTask() function. The new code has been covered with ULTs. Related-To: NEO-6681 Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b4b1fb97bd
commit
9b2ad0c5df
@@ -1140,7 +1140,9 @@ WaitStatus CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *pri
|
||||
const auto waitStatus = waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps);
|
||||
|
||||
if (printfHandler) {
|
||||
printfHandler->printEnqueueOutput();
|
||||
if (!printfHandler->printEnqueueOutput()) {
|
||||
return WaitStatus::GpuHang;
|
||||
}
|
||||
}
|
||||
|
||||
return waitStatus;
|
||||
|
||||
@@ -337,6 +337,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
this->latestSentEnqueueType = enqueueProperties.operation;
|
||||
}
|
||||
|
||||
if (completionStamp.taskCount == CompletionStamp::gpuHang) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
|
||||
|
||||
if (blockQueue) {
|
||||
@@ -819,7 +824,14 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
if (enqueueProperties.blitPropertiesContainer->size() > 0) {
|
||||
auto bcsCsr = getBcsForAuxTranslation();
|
||||
const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice());
|
||||
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
|
||||
if (!newTaskCount) {
|
||||
CompletionStamp completionStamp{};
|
||||
completionStamp.taskCount = CompletionStamp::gpuHang;
|
||||
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), *newTaskCount);
|
||||
dispatchFlags.implicitFlush = true;
|
||||
}
|
||||
|
||||
@@ -1050,7 +1062,14 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
|
||||
UNRECOVERABLE_IF(!enqueueProperties.blitPropertiesContainer);
|
||||
const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice());
|
||||
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
|
||||
if (!newTaskCount) {
|
||||
CompletionStamp completionStamp{};
|
||||
completionStamp.taskCount = CompletionStamp::gpuHang;
|
||||
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), *newTaskCount);
|
||||
}
|
||||
|
||||
return completionStamp;
|
||||
@@ -1157,6 +1176,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,
|
||||
enqueueProperties, timestampPacketDependencies, eventsRequest,
|
||||
eventBuilder, taskLevel, csrDeps, &bcsCsr);
|
||||
if (completionStamp.taskCount == CompletionStamp::gpuHang) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
if (gpgpuSubmission) {
|
||||
commandStreamReceiverOwnership.unlock();
|
||||
}
|
||||
|
||||
@@ -261,9 +261,15 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
commandQueue.clearLastBcsPackets();
|
||||
}
|
||||
|
||||
bool isGpuHangDetected{false};
|
||||
|
||||
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
||||
const auto newTaskCount = bcsCsrForAuxTranslation->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
||||
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount);
|
||||
if (newTaskCount) {
|
||||
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), *newTaskCount);
|
||||
} else {
|
||||
isGpuHangDetected = true;
|
||||
}
|
||||
}
|
||||
commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::GpuKernel);
|
||||
|
||||
@@ -272,8 +278,14 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
}
|
||||
|
||||
if (printfHandler) {
|
||||
commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
|
||||
printfHandler->printEnqueueOutput();
|
||||
const auto waitStatus = commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
|
||||
if (waitStatus == WaitStatus::GpuHang) {
|
||||
isGpuHangDetected = true;
|
||||
}
|
||||
|
||||
if (!printfHandler->printEnqueueOutput()) {
|
||||
isGpuHangDetected = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto surface : surfaces) {
|
||||
@@ -281,10 +293,14 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
}
|
||||
surfaces.clear();
|
||||
|
||||
if (isGpuHangDetected) {
|
||||
completionStamp.taskCount = CompletionStamp::gpuHang;
|
||||
}
|
||||
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
void CommandWithoutKernel::dispatchBlitOperation() {
|
||||
bool CommandWithoutKernel::dispatchBlitOperation() {
|
||||
auto bcsCsr = kernelOperation->bcsCsr;
|
||||
UNRECOVERABLE_IF(bcsCsr == nullptr);
|
||||
|
||||
@@ -301,8 +317,14 @@ void CommandWithoutKernel::dispatchBlitOperation() {
|
||||
}
|
||||
|
||||
const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
||||
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
|
||||
if (!newTaskCount) {
|
||||
return false;
|
||||
}
|
||||
|
||||
commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), *newTaskCount);
|
||||
commandQueue.setLastBcsPacket(bcsCsr->getOsContext().getEngineType());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) {
|
||||
@@ -401,7 +423,9 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
}
|
||||
|
||||
if (kernelOperation->blitEnqueue) {
|
||||
dispatchBlitOperation();
|
||||
if (!dispatchBlitOperation()) {
|
||||
completionStamp.taskCount = CompletionStamp::gpuHang;
|
||||
}
|
||||
}
|
||||
|
||||
commandQueue.updateLatestSentEnqueueType(enqueueOperationType);
|
||||
|
||||
@@ -157,6 +157,6 @@ class CommandWithoutKernel : public Command {
|
||||
public:
|
||||
using Command::Command;
|
||||
CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
|
||||
void dispatchBlitOperation();
|
||||
bool dispatchBlitOperation();
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -85,7 +85,7 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
||||
commandStreamReceiver.makeResident(*printfSurface);
|
||||
}
|
||||
|
||||
void PrintfHandler::printEnqueueOutput() {
|
||||
bool PrintfHandler::printEnqueueOutput() {
|
||||
auto &hwInfo = device.getHardwareInfo();
|
||||
|
||||
auto usesStringMap = kernel->getDescriptor().kernelAttributes.usesStringMap();
|
||||
@@ -108,11 +108,18 @@ void PrintfHandler::printEnqueueOutput() {
|
||||
printfOutputDecompressed.get(),
|
||||
printfSurface->getGpuAddress(),
|
||||
0, 0, 0, Vec3<size_t>(printfOutputSize, 0, 0), 0, 0, 0, 0));
|
||||
bcsEngine.commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, device.getDevice());
|
||||
|
||||
const auto newTaskCount = bcsEngine.commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, device.getDevice());
|
||||
if (!newTaskCount) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
PrintFormatter printFormatter(printfOutputBuffer, printfOutputSize, kernel->is32Bit(),
|
||||
usesStringMap ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr);
|
||||
printFormatter.printKernelOutput();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -20,11 +20,11 @@ class PrintfHandler {
|
||||
public:
|
||||
static PrintfHandler *create(const MultiDispatchInfo &multiDispatchInfo, ClDevice &deviceArg);
|
||||
|
||||
~PrintfHandler();
|
||||
MOCKABLE_VIRTUAL ~PrintfHandler();
|
||||
|
||||
void prepareDispatch(const MultiDispatchInfo &multiDispatchInfo);
|
||||
void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
void printEnqueueOutput();
|
||||
MOCKABLE_VIRTUAL bool printEnqueueOutput();
|
||||
|
||||
GraphicsAllocation *getSurface() {
|
||||
return printfSurface;
|
||||
|
||||
@@ -6,7 +6,9 @@
|
||||
|
||||
set(IGDRCL_SRCS_tests_command_queue
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_1_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_2_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_enqueue_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_1_tests.cpp
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/command_queue/blit_enqueue_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
@@ -33,237 +34,6 @@ namespace NEO {
|
||||
|
||||
extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE];
|
||||
|
||||
template <int timestampPacketEnabled>
|
||||
struct BlitEnqueueTests : public ::testing::Test {
|
||||
class BcsMockContext : public MockContext {
|
||||
public:
|
||||
BcsMockContext(ClDevice *device) : MockContext(device) {
|
||||
bcsOsContext.reset(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})));
|
||||
bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()));
|
||||
bcsCsr->setupContext(*bcsOsContext);
|
||||
bcsCsr->initializeTagAllocation();
|
||||
|
||||
auto mockBlitMemoryToAllocation = [this](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
|
||||
Vec3<size_t> size) -> BlitOperationResult {
|
||||
if (!device.getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported) {
|
||||
return BlitOperationResult::Unsupported;
|
||||
}
|
||||
|
||||
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
|
||||
*bcsCsr, memory, nullptr,
|
||||
hostPtr,
|
||||
memory->getGpuAddress(), 0,
|
||||
0, 0, size, 0, 0, 0, 0);
|
||||
|
||||
BlitPropertiesContainer container;
|
||||
container.push_back(blitProperties);
|
||||
bcsCsr->flushBcsTask(container, true, false, const_cast<Device &>(device));
|
||||
|
||||
return BlitOperationResult::Success;
|
||||
};
|
||||
blitMemoryToAllocationFuncBackup = mockBlitMemoryToAllocation;
|
||||
}
|
||||
|
||||
std::unique_ptr<OsContext> bcsOsContext;
|
||||
std::unique_ptr<CommandStreamReceiver> bcsCsr;
|
||||
VariableBackup<BlitHelperFunctions::BlitMemoryToAllocationFunc> blitMemoryToAllocationFuncBackup{
|
||||
&BlitHelperFunctions::blitMemoryToAllocation};
|
||||
};
|
||||
|
||||
template <typename FamilyType>
|
||||
void setUpT() {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
REQUIRE_AUX_RESOLVES();
|
||||
|
||||
DebugManager.flags.EnableTimestampPacket.set(timestampPacketEnabled);
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
|
||||
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
|
||||
DebugManager.flags.RenderCompressedBuffersEnabled.set(1);
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
|
||||
DebugManager.flags.CsrDispatchMode.set(static_cast<int32_t>(DispatchMode::ImmediateDispatch));
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
device = std::make_unique<MockClDevice>(MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
|
||||
bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
|
||||
capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isBlitterFullySupported(device->getHardwareInfo())) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
if (createBcsEngine) {
|
||||
auto &engine = device->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority);
|
||||
bcsOsContext.reset(OsContext::create(nullptr, 1,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getDeviceBitfield())));
|
||||
engine.osContext = bcsOsContext.get();
|
||||
engine.commandStreamReceiver->setupContext(*bcsOsContext);
|
||||
}
|
||||
bcsMockContext = std::make_unique<BcsMockContext>(device.get());
|
||||
auto mockCmdQueue = new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr);
|
||||
commandQueue.reset(mockCmdQueue);
|
||||
mockKernel = std::make_unique<MockKernelWithInternals>(*device, bcsMockContext.get());
|
||||
auto mockProgram = mockKernel->mockProgram;
|
||||
mockProgram->setAllowNonUniform(true);
|
||||
|
||||
gpgpuCsr = &mockCmdQueue->getGpgpuCommandStreamReceiver();
|
||||
bcsCsr = mockCmdQueue->bcsEngines[0]->commandStreamReceiver;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void tearDownT() {}
|
||||
|
||||
template <size_t N>
|
||||
void setMockKernelArgs(std::array<Buffer *, N> buffers) {
|
||||
for (uint32_t i = 0; i < buffers.size(); i++) {
|
||||
mockKernel->kernelInfo.addArgBuffer(i, 0);
|
||||
}
|
||||
|
||||
mockKernel->mockKernel->initialize();
|
||||
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
|
||||
|
||||
for (uint32_t i = 0; i < buffers.size(); i++) {
|
||||
cl_mem clMem = buffers[i];
|
||||
mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem);
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
void setMockKernelArgs(std::array<GraphicsAllocation *, N> allocs) {
|
||||
for (uint32_t i = 0; i < allocs.size(); i++) {
|
||||
mockKernel->kernelInfo.addArgBuffer(i, 0);
|
||||
}
|
||||
|
||||
mockKernel->mockKernel->initialize();
|
||||
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
|
||||
|
||||
for (uint32_t i = 0; i < allocs.size(); i++) {
|
||||
auto alloc = allocs[i];
|
||||
auto ptr = reinterpret_cast<void *>(alloc->getGpuAddressToPatch());
|
||||
mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
ReleaseableObjectPtr<Buffer> createBuffer(size_t size, bool compressed) {
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, size, nullptr, retVal));
|
||||
auto graphicsAllocation = buffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
|
||||
setAllocationType(graphicsAllocation, compressed);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
MockGraphicsAllocation *createGfxAllocation(size_t size, bool compressed) {
|
||||
auto alloc = new MockGraphicsAllocation(nullptr, size);
|
||||
setAllocationType(alloc, compressed);
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void setAllocationType(GraphicsAllocation *graphicsAllocation, bool compressed) {
|
||||
graphicsAllocation->setAllocationType(AllocationType::BUFFER);
|
||||
|
||||
if (compressed && !graphicsAllocation->getDefaultGmm()) {
|
||||
auto gmmHelper = device->getRootDeviceEnvironment().getGmmHelper();
|
||||
|
||||
graphicsAllocation->setDefaultGmm(new Gmm(gmmHelper, nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true));
|
||||
}
|
||||
|
||||
if (graphicsAllocation->getDefaultGmm()) {
|
||||
graphicsAllocation->getDefaultGmm()->isCompressionEnabled = compressed;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
GenCmdList getCmdList(LinearStream &linearStream, size_t offset) {
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<Family>(linearStream, offset);
|
||||
|
||||
return hwParser.cmdList;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
GenCmdList::iterator expectPipeControl(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
using PIPE_CONTROL = typename Family::PIPE_CONTROL;
|
||||
PIPE_CONTROL *pipeControlCmd = nullptr;
|
||||
GenCmdList::iterator commandItor = itorStart;
|
||||
bool stallingWrite = false;
|
||||
|
||||
do {
|
||||
commandItor = find<PIPE_CONTROL *>(commandItor, itorEnd);
|
||||
if (itorEnd == commandItor) {
|
||||
EXPECT_TRUE(false);
|
||||
return itorEnd;
|
||||
}
|
||||
pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*commandItor);
|
||||
stallingWrite = pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA &&
|
||||
pipeControlCmd->getCommandStreamerStallEnable();
|
||||
|
||||
++commandItor;
|
||||
} while (!stallingWrite);
|
||||
|
||||
return --commandItor;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
GenCmdList::iterator expectMiFlush(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
Family *miFlushCmd = nullptr;
|
||||
GenCmdList::iterator commandItor = itorStart;
|
||||
bool miFlushWithMemoryWrite = false;
|
||||
|
||||
do {
|
||||
commandItor = find<Family *>(commandItor, itorEnd);
|
||||
if (itorEnd == commandItor) {
|
||||
EXPECT_TRUE(false);
|
||||
return itorEnd;
|
||||
}
|
||||
miFlushCmd = genCmdCast<Family *>(*commandItor);
|
||||
miFlushWithMemoryWrite = miFlushCmd->getDestinationAddress() != 0;
|
||||
|
||||
++commandItor;
|
||||
} while (!miFlushWithMemoryWrite);
|
||||
|
||||
return --commandItor;
|
||||
}
|
||||
|
||||
template <typename Command>
|
||||
GenCmdList::iterator expectCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
auto commandItor = find<Command *>(itorStart, itorEnd);
|
||||
EXPECT_TRUE(commandItor != itorEnd);
|
||||
|
||||
return commandItor;
|
||||
}
|
||||
|
||||
template <typename Command>
|
||||
void expectNoCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
auto commandItor = find<Command *>(itorStart, itorEnd);
|
||||
EXPECT_TRUE(commandItor == itorEnd);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) {
|
||||
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
EXPECT_EQ(expectedAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
std::unique_ptr<OsContext> bcsOsContext;
|
||||
std::unique_ptr<MockClDevice> device;
|
||||
std::unique_ptr<BcsMockContext> bcsMockContext;
|
||||
std::unique_ptr<CommandQueue> commandQueue;
|
||||
std::unique_ptr<MockKernelWithInternals> mockKernel;
|
||||
|
||||
CommandStreamReceiver *bcsCsr = nullptr;
|
||||
CommandStreamReceiver *gpgpuCsr = nullptr;
|
||||
|
||||
size_t gws[3] = {63, 0, 0};
|
||||
size_t lws[3] = {16, 0, 0};
|
||||
uint32_t hostPtr = 0;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
};
|
||||
|
||||
using BlitAuxTranslationTests = BlitEnqueueTests<1>;
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingCommandBufferThenEnsureCorrectOrder) {
|
||||
@@ -335,6 +105,22 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstruct
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenGpuHangOnFlushBcsAndBlitAuxTranslationWhenConstructingCommandBufferThenOutOfResourcesIsReturned) {
|
||||
auto buffer0 = createBuffer(1, true);
|
||||
auto buffer1 = createBuffer(1, false);
|
||||
auto buffer2 = createBuffer(1, true);
|
||||
setMockKernelArgs(std::array<Buffer *, 3>{{buffer0.get(), buffer1.get(), buffer2.get()}});
|
||||
|
||||
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
|
||||
ultBcsCsr->callBaseFlushBcsTask = false;
|
||||
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
|
||||
|
||||
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
|
||||
const auto result = mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_OUT_OF_RESOURCES, result);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingBlockedCommandBufferThenEnsureCorrectOrder) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
@@ -873,6 +659,28 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructing
|
||||
EXPECT_FALSE(commandQueue->isQueueBlocked());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenGpuHangOnFlushBcsTaskAndBlitTranslationWhenConstructingBlockedCommandBufferAndRunningItThenEventExecutionIsAborted) {
|
||||
auto buffer0 = createBuffer(1, true);
|
||||
auto buffer1 = createBuffer(1, true);
|
||||
setMockKernelArgs(std::array<Buffer *, 2>{{buffer0.get(), buffer1.get()}});
|
||||
|
||||
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
|
||||
ultBcsCsr->callBaseFlushBcsTask = false;
|
||||
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
|
||||
|
||||
UserEvent userEvent;
|
||||
cl_event waitlist[] = {&userEvent};
|
||||
cl_event kernelEvent{};
|
||||
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, &kernelEvent);
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
|
||||
auto abortedEvent = castToObjectOrAbort<Event>(kernelEvent);
|
||||
EXPECT_EQ(Event::executionAbortedDueToGpuHang, abortedEvent->peekExecutionStatus());
|
||||
|
||||
abortedEvent->release();
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenEnqueueIsCalledThenDoImplicitFlushOnGpgpuCsr) {
|
||||
auto buffer = createBuffer(1, true);
|
||||
setMockKernelArgs(std::array<Buffer *, 1>{{buffer.get()}});
|
||||
@@ -1132,6 +940,20 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenDebugFlagSetWhenDis
|
||||
EXPECT_EQ(1u, miFlushAfterCopyFound);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenGpuHangOnFlushBcsTaskAndDebugFlagSetWhenDispatchingBlitEnqueueThenOutOfResourcesIsReturned) {
|
||||
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
|
||||
ultBcsCsr->callBaseFlushBcsTask = false;
|
||||
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
|
||||
|
||||
buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
|
||||
DebugManager.flags.PauseOnBlitCopy.set(1);
|
||||
|
||||
const auto result = commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_OUT_OF_RESOURCES, result);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenDebugFlagSetToMinusTwoWhenDispatchingBlitEnqueueThenAddPausingCommandsForEachEnqueue) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
@@ -1359,6 +1181,34 @@ HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenBlockedQueueWhenBlitEnqueuedThenF
|
||||
EXPECT_FALSE(commandQueue->isQueueBlocked());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenGpuHangOnFlushBcsTaskAndBlockedQueueWhenBlitEnqueuedThenEventIsAborted) {
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
uint32_t flushCounter = 0;
|
||||
|
||||
auto myUltGpgpuCsr = static_cast<MyUltCsr<FamilyType> *>(gpgpuCsr);
|
||||
myUltGpgpuCsr->flushCounter = &flushCounter;
|
||||
|
||||
auto myUltBcsCsr = static_cast<MyUltCsr<FamilyType> *>(bcsCsr);
|
||||
myUltBcsCsr->flushCounter = &flushCounter;
|
||||
myUltBcsCsr->callBaseFlushBcsTask = false;
|
||||
myUltBcsCsr->flushBcsTaskReturnValue = std::nullopt;
|
||||
|
||||
UserEvent userEvent;
|
||||
cl_event waitlist[] = {&userEvent};
|
||||
cl_event writeBufferEvent{};
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, waitlist, &writeBufferEvent);
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
|
||||
auto abortedEvent = castToObjectOrAbort<Event>(writeBufferEvent);
|
||||
EXPECT_EQ(Event::executionAbortedDueToGpuHang, abortedEvent->peekExecutionStatus());
|
||||
|
||||
abortedEvent->release();
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenDebugFlagSetWhenCheckingBcsCacheFlushRequirementThenReturnCorrectValue) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
|
||||
@@ -1664,511 +1514,4 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenMarkerThatFollowsCopyOperatio
|
||||
clReleaseEvent(outEvent2);
|
||||
}
|
||||
|
||||
using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>;
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenProfilingEnabledWhenSubmittingWithoutFlushToGpgpuThenSetSubmitTime) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
|
||||
mockCommandQueue->setProfilingEnabled();
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
cl_event clEvent;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto event = castToObject<Event>(clEvent);
|
||||
|
||||
uint64_t submitTime = 0;
|
||||
event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(submitTime), &submitTime, nullptr);
|
||||
|
||||
EXPECT_NE(0u, submitTime);
|
||||
|
||||
clReleaseEvent(clEvent);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenOutEventWhenEnqueuingBcsSubmissionThenSetupBcsCsrInEvent) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
{
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
|
||||
|
||||
cl_event clEvent;
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, bcsCsr->peekTaskCount());
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto event = castToObject<Event>(clEvent);
|
||||
EXPECT_EQ(0u, event->peekBcsTaskCountFromCommandQueue());
|
||||
|
||||
clReleaseEvent(clEvent);
|
||||
}
|
||||
{
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
|
||||
|
||||
cl_event clEvent;
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, bcsCsr->peekTaskCount());
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto event = castToObject<Event>(clEvent);
|
||||
EXPECT_EQ(1u, event->peekBcsTaskCountFromCommandQueue());
|
||||
|
||||
clReleaseEvent(clEvent);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyThenDontSubmitToGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
|
||||
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup();
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenImmediateDispatchCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
|
||||
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::ImmediateDispatch);
|
||||
mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup();
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyAfterBarrierThenSubmitToGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueBarrierWithWaitList(0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
UserEvent userEvent;
|
||||
cl_event waitlist = &userEvent;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
EXPECT_FALSE(commandQueue->isQueueBlocked());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
UserEvent userEvent;
|
||||
cl_event waitlist = &userEvent;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
EXPECT_FALSE(commandQueue->isQueueBlocked());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThatRequiresCacheFlushThenSubmitToGpgpu) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
// enqueue kernel to force gpgpu submission on write buffer
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto offset = mockCommandQueue->getCS(0).getUsed();
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto cmdListBcs = getCmdList<FamilyType>(bcsCsr->getCS(0), 0);
|
||||
auto cmdListQueue = getCmdList<FamilyType>(mockCommandQueue->getCS(0), offset);
|
||||
|
||||
uint64_t cacheFlushWriteAddress = 0;
|
||||
|
||||
{
|
||||
auto cmdFound = expectPipeControl<FamilyType>(cmdListQueue.begin(), cmdListQueue.end());
|
||||
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*cmdFound);
|
||||
|
||||
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
|
||||
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
|
||||
cacheFlushWriteAddress = NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControlCmd);
|
||||
EXPECT_NE(0u, cacheFlushWriteAddress);
|
||||
}
|
||||
|
||||
{
|
||||
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdListBcs.begin(), cmdListBcs.end());
|
||||
verifySemaphore<FamilyType>(cmdFound, cacheFlushWriteAddress);
|
||||
|
||||
cmdFound = expectCommand<XY_COPY_BLT>(cmdListBcs.begin(), cmdListBcs.end());
|
||||
EXPECT_NE(cmdListBcs.end(), cmdFound);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenClearDependencies) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
const bool clearDependencies = true;
|
||||
|
||||
{
|
||||
TimestampPacketContainer previousNodes;
|
||||
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *gpgpuCsr); // init
|
||||
EXPECT_EQ(0u, previousNodes.peekNodes().size());
|
||||
}
|
||||
|
||||
{
|
||||
TimestampPacketContainer previousNodes;
|
||||
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
|
||||
EXPECT_EQ(0u, previousNodes.peekNodes().size());
|
||||
}
|
||||
}
|
||||
|
||||
using BlitCopyTests = BlitEnqueueTests<1>;
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
|
||||
if (kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool()) {
|
||||
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
} else {
|
||||
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
|
||||
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
|
||||
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenLocalMemoryAccessNotAllowedWhenGlobalConstantsAreExportedThenUseBlitter) {
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
|
||||
char constantData[128] = {};
|
||||
ProgramInfo programInfo;
|
||||
programInfo.globalConstants.initData = constantData;
|
||||
programInfo.globalConstants.size = sizeof(constantData);
|
||||
auto mockLinkerInput = std::make_unique<WhiteBox<LinkerInput>>();
|
||||
mockLinkerInput->traits.exportsGlobalConstants = true;
|
||||
programInfo.linkerInput = std::move(mockLinkerInput);
|
||||
|
||||
MockProgram program(bcsMockContext.get(), false, toClDeviceVector(*device));
|
||||
|
||||
EXPECT_EQ(0u, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
program.processProgramInfo(programInfo, *device);
|
||||
|
||||
EXPECT_EQ(1u, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
|
||||
ASSERT_NE(nullptr, program.getConstantSurface(rootDeviceIndex));
|
||||
auto gpuAddress = reinterpret_cast<const void *>(program.getConstantSurface(rootDeviceIndex)->getGpuAddress());
|
||||
EXPECT_NE(nullptr, bcsMockContext->getSVMAllocsManager()->getSVMAlloc(gpuAddress));
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenSubstituteKernelHeapIsCalledThenUseBcsForTransfer) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
MockKernelWithInternals kernel(*device);
|
||||
const size_t initialHeapSize = 0x40;
|
||||
kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize;
|
||||
|
||||
kernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
ASSERT_NE(nullptr, kernel.kernelInfo.kernelAllocation);
|
||||
EXPECT_TRUE(kernel.kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool());
|
||||
|
||||
const size_t newHeapSize = initialHeapSize;
|
||||
char newHeap[newHeapSize];
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
|
||||
|
||||
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernel.kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenLinkerRequiresPatchingOfInstructionSegmentsThenUseBcsForTransfer) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
auto linkerInput = std::make_unique<WhiteBox<LinkerInput>>();
|
||||
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
|
||||
|
||||
KernelInfo kernelInfo = {};
|
||||
std::vector<char> kernelHeap;
|
||||
kernelHeap.resize(32, 7);
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelHeap.data();
|
||||
kernelInfo.heapInfo.KernelHeapSize = static_cast<uint32_t>(kernelHeap.size());
|
||||
kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
ASSERT_NE(nullptr, kernelInfo.kernelAllocation);
|
||||
EXPECT_TRUE(kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool());
|
||||
|
||||
std::vector<NEO::ExternalFunctionInfo> externalFunctions;
|
||||
MockProgram program{nullptr, false, toClDeviceVector(*device)};
|
||||
program.getKernelInfoArray(device->getRootDeviceIndex()).push_back(&kernelInfo);
|
||||
program.setLinkerInput(device->getRootDeviceIndex(), std::move(linkerInput));
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, externalFunctions);
|
||||
EXPECT_EQ(CL_SUCCESS, ret);
|
||||
|
||||
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
program.getKernelInfoArray(device->getRootDeviceIndex()).clear();
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
} // namespace NEO
|
||||
542
opencl/test/unit_test/command_queue/blit_enqueue_2_tests.cpp
Normal file
542
opencl/test/unit_test/command_queue/blit_enqueue_2_tests.cpp
Normal file
@@ -0,0 +1,542 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/local_memory_access_modes.h"
|
||||
#include "shared/source/helpers/pause_on_gpu_properties.h"
|
||||
#include "shared/source/helpers/vec.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/unit_test/compiler_interface/linker_mock.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/command_queue/blit_enqueue_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>;
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenProfilingEnabledWhenSubmittingWithoutFlushToGpgpuThenSetSubmitTime) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
|
||||
mockCommandQueue->setProfilingEnabled();
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
cl_event clEvent;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto event = castToObject<Event>(clEvent);
|
||||
|
||||
uint64_t submitTime = 0;
|
||||
event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(submitTime), &submitTime, nullptr);
|
||||
|
||||
EXPECT_NE(0u, submitTime);
|
||||
|
||||
clReleaseEvent(clEvent);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenOutEventWhenEnqueuingBcsSubmissionThenSetupBcsCsrInEvent) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
{
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
|
||||
|
||||
cl_event clEvent;
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, bcsCsr->peekTaskCount());
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto event = castToObject<Event>(clEvent);
|
||||
EXPECT_EQ(0u, event->peekBcsTaskCountFromCommandQueue());
|
||||
|
||||
clReleaseEvent(clEvent);
|
||||
}
|
||||
{
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
|
||||
|
||||
cl_event clEvent;
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, bcsCsr->peekTaskCount());
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto event = castToObject<Event>(clEvent);
|
||||
EXPECT_EQ(1u, event->peekBcsTaskCountFromCommandQueue());
|
||||
|
||||
clReleaseEvent(clEvent);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyThenDontSubmitToGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
|
||||
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup();
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenImmediateDispatchCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
|
||||
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::ImmediateDispatch);
|
||||
mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup();
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyAfterBarrierThenSubmitToGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueBarrierWithWaitList(0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
UserEvent userEvent;
|
||||
cl_event waitlist = &userEvent;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
EXPECT_FALSE(commandQueue->isQueueBlocked());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
UserEvent userEvent;
|
||||
cl_event waitlist = &userEvent;
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
userEvent.setStatus(CL_COMPLETE);
|
||||
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
|
||||
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
EXPECT_FALSE(commandQueue->isQueueBlocked());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThatRequiresCacheFlushThenSubmitToGpgpu) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
|
||||
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
|
||||
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
|
||||
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true;
|
||||
|
||||
auto buffer = createBuffer(1, false);
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
int hostPtr = 0;
|
||||
|
||||
// enqueue kernel to force gpgpu submission on write buffer
|
||||
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto offset = mockCommandQueue->getCS(0).getUsed();
|
||||
|
||||
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());
|
||||
|
||||
auto cmdListBcs = getCmdList<FamilyType>(bcsCsr->getCS(0), 0);
|
||||
auto cmdListQueue = getCmdList<FamilyType>(mockCommandQueue->getCS(0), offset);
|
||||
|
||||
uint64_t cacheFlushWriteAddress = 0;
|
||||
|
||||
{
|
||||
auto cmdFound = expectPipeControl<FamilyType>(cmdListQueue.begin(), cmdListQueue.end());
|
||||
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*cmdFound);
|
||||
|
||||
EXPECT_TRUE(pipeControlCmd->getDcFlushEnable());
|
||||
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
|
||||
cacheFlushWriteAddress = NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControlCmd);
|
||||
EXPECT_NE(0u, cacheFlushWriteAddress);
|
||||
}
|
||||
|
||||
{
|
||||
auto cmdFound = expectCommand<MI_SEMAPHORE_WAIT>(cmdListBcs.begin(), cmdListBcs.end());
|
||||
verifySemaphore<FamilyType>(cmdFound, cacheFlushWriteAddress);
|
||||
|
||||
cmdFound = expectCommand<XY_COPY_BLT>(cmdListBcs.begin(), cmdListBcs.end());
|
||||
EXPECT_NE(cmdListBcs.end(), cmdFound);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenClearDependencies) {
|
||||
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||
const bool clearDependencies = true;
|
||||
|
||||
{
|
||||
TimestampPacketContainer previousNodes;
|
||||
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *gpgpuCsr); // init
|
||||
EXPECT_EQ(0u, previousNodes.peekNodes().size());
|
||||
}
|
||||
|
||||
{
|
||||
TimestampPacketContainer previousNodes;
|
||||
mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr);
|
||||
EXPECT_EQ(0u, previousNodes.peekNodes().size());
|
||||
}
|
||||
}
|
||||
|
||||
using BlitCopyTests = BlitEnqueueTests<1>;
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
|
||||
if (kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool()) {
|
||||
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
} else {
|
||||
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
|
||||
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
|
||||
|
||||
uint32_t kernelHeap = 0;
|
||||
KernelInfo kernelInfo;
|
||||
kernelInfo.heapInfo.KernelHeapSize = 1;
|
||||
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
|
||||
EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenLocalMemoryAccessNotAllowedWhenGlobalConstantsAreExportedThenUseBlitter) {
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
|
||||
char constantData[128] = {};
|
||||
ProgramInfo programInfo;
|
||||
programInfo.globalConstants.initData = constantData;
|
||||
programInfo.globalConstants.size = sizeof(constantData);
|
||||
auto mockLinkerInput = std::make_unique<WhiteBox<LinkerInput>>();
|
||||
mockLinkerInput->traits.exportsGlobalConstants = true;
|
||||
programInfo.linkerInput = std::move(mockLinkerInput);
|
||||
|
||||
MockProgram program(bcsMockContext.get(), false, toClDeviceVector(*device));
|
||||
|
||||
EXPECT_EQ(0u, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
program.processProgramInfo(programInfo, *device);
|
||||
|
||||
EXPECT_EQ(1u, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
auto rootDeviceIndex = device->getRootDeviceIndex();
|
||||
|
||||
ASSERT_NE(nullptr, program.getConstantSurface(rootDeviceIndex));
|
||||
auto gpuAddress = reinterpret_cast<const void *>(program.getConstantSurface(rootDeviceIndex)->getGpuAddress());
|
||||
EXPECT_NE(nullptr, bcsMockContext->getSVMAllocsManager()->getSVMAlloc(gpuAddress));
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenSubstituteKernelHeapIsCalledThenUseBcsForTransfer) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
MockKernelWithInternals kernel(*device);
|
||||
const size_t initialHeapSize = 0x40;
|
||||
kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize;
|
||||
|
||||
kernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
ASSERT_NE(nullptr, kernel.kernelInfo.kernelAllocation);
|
||||
EXPECT_TRUE(kernel.kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool());
|
||||
|
||||
const size_t newHeapSize = initialHeapSize;
|
||||
char newHeap[newHeapSize];
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize);
|
||||
|
||||
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernel.kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenLinkerRequiresPatchingOfInstructionSegmentsThenUseBcsForTransfer) {
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
|
||||
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(AllocationType::KERNEL_ISA) - 1));
|
||||
|
||||
device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
auto linkerInput = std::make_unique<WhiteBox<LinkerInput>>();
|
||||
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
|
||||
|
||||
KernelInfo kernelInfo = {};
|
||||
std::vector<char> kernelHeap;
|
||||
kernelHeap.resize(32, 7);
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelHeap.data();
|
||||
kernelInfo.heapInfo.KernelHeapSize = static_cast<uint32_t>(kernelHeap.size());
|
||||
kernelInfo.createKernelAllocation(device->getDevice(), false);
|
||||
ASSERT_NE(nullptr, kernelInfo.kernelAllocation);
|
||||
EXPECT_TRUE(kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool());
|
||||
|
||||
std::vector<NEO::ExternalFunctionInfo> externalFunctions;
|
||||
MockProgram program{nullptr, false, toClDeviceVector(*device)};
|
||||
program.getKernelInfoArray(device->getRootDeviceIndex()).push_back(&kernelInfo);
|
||||
program.setLinkerInput(device->getRootDeviceIndex(), std::move(linkerInput));
|
||||
|
||||
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
|
||||
|
||||
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, externalFunctions);
|
||||
EXPECT_EQ(CL_SUCCESS, ret);
|
||||
|
||||
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
|
||||
|
||||
program.getKernelInfoArray(device->getRootDeviceIndex()).clear();
|
||||
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
267
opencl/test/unit_test/command_queue/blit_enqueue_fixture.h
Normal file
267
opencl/test/unit_test/command_queue/blit_enqueue_fixture.h
Normal file
@@ -0,0 +1,267 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/helpers/local_memory_access_modes.h"
|
||||
#include "shared/source/helpers/pause_on_gpu_properties.h"
|
||||
#include "shared/source/helpers/vec.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/unit_test/compiler_interface/linker_mock.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <int timestampPacketEnabled>
|
||||
struct BlitEnqueueTests : public ::testing::Test {
|
||||
class BcsMockContext : public MockContext {
|
||||
public:
|
||||
BcsMockContext(ClDevice *device) : MockContext(device) {
|
||||
bcsOsContext.reset(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})));
|
||||
bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()));
|
||||
bcsCsr->setupContext(*bcsOsContext);
|
||||
bcsCsr->initializeTagAllocation();
|
||||
|
||||
auto mockBlitMemoryToAllocation = [this](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr,
|
||||
Vec3<size_t> size) -> BlitOperationResult {
|
||||
if (!device.getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported) {
|
||||
return BlitOperationResult::Unsupported;
|
||||
}
|
||||
|
||||
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
|
||||
*bcsCsr, memory, nullptr,
|
||||
hostPtr,
|
||||
memory->getGpuAddress(), 0,
|
||||
0, 0, size, 0, 0, 0, 0);
|
||||
|
||||
BlitPropertiesContainer container;
|
||||
container.push_back(blitProperties);
|
||||
bcsCsr->flushBcsTask(container, true, false, const_cast<Device &>(device));
|
||||
|
||||
return BlitOperationResult::Success;
|
||||
};
|
||||
blitMemoryToAllocationFuncBackup = mockBlitMemoryToAllocation;
|
||||
}
|
||||
|
||||
std::unique_ptr<OsContext> bcsOsContext;
|
||||
std::unique_ptr<CommandStreamReceiver> bcsCsr;
|
||||
VariableBackup<BlitHelperFunctions::BlitMemoryToAllocationFunc> blitMemoryToAllocationFuncBackup{
|
||||
&BlitHelperFunctions::blitMemoryToAllocation};
|
||||
};
|
||||
|
||||
template <typename FamilyType>
|
||||
void setUpT() {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
REQUIRE_AUX_RESOLVES();
|
||||
|
||||
DebugManager.flags.EnableTimestampPacket.set(timestampPacketEnabled);
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
|
||||
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
|
||||
DebugManager.flags.RenderCompressedBuffersEnabled.set(1);
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
|
||||
DebugManager.flags.CsrDispatchMode.set(static_cast<int32_t>(DispatchMode::ImmediateDispatch));
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
device = std::make_unique<MockClDevice>(MockClDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
|
||||
bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
|
||||
capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isBlitterFullySupported(device->getHardwareInfo())) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
if (createBcsEngine) {
|
||||
auto &engine = device->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority);
|
||||
bcsOsContext.reset(OsContext::create(nullptr, 1,
|
||||
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getDeviceBitfield())));
|
||||
engine.osContext = bcsOsContext.get();
|
||||
engine.commandStreamReceiver->setupContext(*bcsOsContext);
|
||||
}
|
||||
bcsMockContext = std::make_unique<BcsMockContext>(device.get());
|
||||
auto mockCmdQueue = new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr);
|
||||
commandQueue.reset(mockCmdQueue);
|
||||
mockKernel = std::make_unique<MockKernelWithInternals>(*device, bcsMockContext.get());
|
||||
auto mockProgram = mockKernel->mockProgram;
|
||||
mockProgram->setAllowNonUniform(true);
|
||||
|
||||
gpgpuCsr = &mockCmdQueue->getGpgpuCommandStreamReceiver();
|
||||
bcsCsr = mockCmdQueue->bcsEngines[0]->commandStreamReceiver;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void tearDownT() {}
|
||||
|
||||
template <size_t N>
|
||||
void setMockKernelArgs(std::array<Buffer *, N> buffers) {
|
||||
for (uint32_t i = 0; i < buffers.size(); i++) {
|
||||
mockKernel->kernelInfo.addArgBuffer(i, 0);
|
||||
}
|
||||
|
||||
mockKernel->mockKernel->initialize();
|
||||
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
|
||||
|
||||
for (uint32_t i = 0; i < buffers.size(); i++) {
|
||||
cl_mem clMem = buffers[i];
|
||||
mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem);
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
void setMockKernelArgs(std::array<GraphicsAllocation *, N> allocs) {
|
||||
for (uint32_t i = 0; i < allocs.size(); i++) {
|
||||
mockKernel->kernelInfo.addArgBuffer(i, 0);
|
||||
}
|
||||
|
||||
mockKernel->mockKernel->initialize();
|
||||
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
|
||||
|
||||
for (uint32_t i = 0; i < allocs.size(); i++) {
|
||||
auto alloc = allocs[i];
|
||||
auto ptr = reinterpret_cast<void *>(alloc->getGpuAddressToPatch());
|
||||
mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
ReleaseableObjectPtr<Buffer> createBuffer(size_t size, bool compressed) {
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, size, nullptr, retVal));
|
||||
auto graphicsAllocation = buffer->getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
|
||||
setAllocationType(graphicsAllocation, compressed);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
MockGraphicsAllocation *createGfxAllocation(size_t size, bool compressed) {
|
||||
auto alloc = new MockGraphicsAllocation(nullptr, size);
|
||||
setAllocationType(alloc, compressed);
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void setAllocationType(GraphicsAllocation *graphicsAllocation, bool compressed) {
|
||||
graphicsAllocation->setAllocationType(AllocationType::BUFFER);
|
||||
|
||||
if (compressed && !graphicsAllocation->getDefaultGmm()) {
|
||||
auto gmmHelper = device->getRootDeviceEnvironment().getGmmHelper();
|
||||
|
||||
graphicsAllocation->setDefaultGmm(new Gmm(gmmHelper, nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true));
|
||||
}
|
||||
|
||||
if (graphicsAllocation->getDefaultGmm()) {
|
||||
graphicsAllocation->getDefaultGmm()->isCompressionEnabled = compressed;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
GenCmdList getCmdList(LinearStream &linearStream, size_t offset) {
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<Family>(linearStream, offset);
|
||||
|
||||
return hwParser.cmdList;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
GenCmdList::iterator expectPipeControl(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
using PIPE_CONTROL = typename Family::PIPE_CONTROL;
|
||||
PIPE_CONTROL *pipeControlCmd = nullptr;
|
||||
GenCmdList::iterator commandItor = itorStart;
|
||||
bool stallingWrite = false;
|
||||
|
||||
do {
|
||||
commandItor = find<PIPE_CONTROL *>(commandItor, itorEnd);
|
||||
if (itorEnd == commandItor) {
|
||||
EXPECT_TRUE(false);
|
||||
return itorEnd;
|
||||
}
|
||||
pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*commandItor);
|
||||
stallingWrite = pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA &&
|
||||
pipeControlCmd->getCommandStreamerStallEnable();
|
||||
|
||||
++commandItor;
|
||||
} while (!stallingWrite);
|
||||
|
||||
return --commandItor;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
GenCmdList::iterator expectMiFlush(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
Family *miFlushCmd = nullptr;
|
||||
GenCmdList::iterator commandItor = itorStart;
|
||||
bool miFlushWithMemoryWrite = false;
|
||||
|
||||
do {
|
||||
commandItor = find<Family *>(commandItor, itorEnd);
|
||||
if (itorEnd == commandItor) {
|
||||
EXPECT_TRUE(false);
|
||||
return itorEnd;
|
||||
}
|
||||
miFlushCmd = genCmdCast<Family *>(*commandItor);
|
||||
miFlushWithMemoryWrite = miFlushCmd->getDestinationAddress() != 0;
|
||||
|
||||
++commandItor;
|
||||
} while (!miFlushWithMemoryWrite);
|
||||
|
||||
return --commandItor;
|
||||
}
|
||||
|
||||
template <typename Command>
|
||||
GenCmdList::iterator expectCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
auto commandItor = find<Command *>(itorStart, itorEnd);
|
||||
EXPECT_TRUE(commandItor != itorEnd);
|
||||
|
||||
return commandItor;
|
||||
}
|
||||
|
||||
template <typename Command>
|
||||
void expectNoCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) {
|
||||
auto commandItor = find<Command *>(itorStart, itorEnd);
|
||||
EXPECT_TRUE(commandItor == itorEnd);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) {
|
||||
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
EXPECT_EQ(expectedAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
std::unique_ptr<OsContext> bcsOsContext;
|
||||
std::unique_ptr<MockClDevice> device;
|
||||
std::unique_ptr<BcsMockContext> bcsMockContext;
|
||||
std::unique_ptr<CommandQueue> commandQueue;
|
||||
std::unique_ptr<MockKernelWithInternals> mockKernel;
|
||||
|
||||
CommandStreamReceiver *bcsCsr = nullptr;
|
||||
CommandStreamReceiver *gpgpuCsr = nullptr;
|
||||
|
||||
size_t gws[3] = {63, 0, 0};
|
||||
size_t lws[3] = {16, 0, 0};
|
||||
uint32_t hostPtr = 0;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_platform.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_printf_handler.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
|
||||
using namespace NEO;
|
||||
@@ -1121,6 +1122,18 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe
|
||||
EXPECT_EQ(mockCsr->flushCalledCount, 1);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenGpuHangOnPrintEnqueueOutputWhenWaitingForEnginesThenGpuHangIsReported) {
|
||||
MockCommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, nullptr);
|
||||
commandQueue.waitUntilCompleteReturnValue = WaitStatus::Ready;
|
||||
|
||||
const auto blockedQueue{false};
|
||||
const auto cleanTemporaryAllocationsList{false};
|
||||
MockPrintfHandler printfHandler(*pClDevice);
|
||||
|
||||
const auto waitStatus = commandQueue.waitForAllEngines(blockedQueue, &printfHandler, cleanTemporaryAllocationsList);
|
||||
EXPECT_EQ(WaitStatus::GpuHang, waitStatus);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdateTaskCountFromWaitSetWhenFlushTaskThenPipeControlAndBBSIsFlushed) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(1);
|
||||
|
||||
@@ -606,29 +606,31 @@ HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) {
|
||||
EXPECT_EQ(newTaskCount, csr.latestWaitForCompletionWithTimeoutTaskCount.load());
|
||||
}
|
||||
|
||||
HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
|
||||
class MyMockCsr : public UltCommandStreamReceiver<FamilyType> {
|
||||
public:
|
||||
using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;
|
||||
template <typename FamilyType>
|
||||
class MyMockCsr : public UltCommandStreamReceiver<FamilyType> {
|
||||
public:
|
||||
using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;
|
||||
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, QueueThrottle throttle) override {
|
||||
waitForTaskCountWithKmdNotifyFallbackCalled++;
|
||||
taskCountToWaitPassed = taskCountToWait;
|
||||
flushStampToWaitPassed = flushStampToWait;
|
||||
useQuickKmdSleepPassed = useQuickKmdSleep;
|
||||
throttlePassed = throttle;
|
||||
return WaitStatus::Ready;
|
||||
}
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, QueueThrottle throttle) override {
|
||||
waitForTaskCountWithKmdNotifyFallbackCalled++;
|
||||
taskCountToWaitPassed = taskCountToWait;
|
||||
flushStampToWaitPassed = flushStampToWait;
|
||||
useQuickKmdSleepPassed = useQuickKmdSleep;
|
||||
throttlePassed = throttle;
|
||||
return waitForTaskCountWithKmdNotifyFallbackReturnValue;
|
||||
}
|
||||
|
||||
FlushStamp flushStampToWaitPassed = 0;
|
||||
uint32_t taskCountToWaitPassed = 0;
|
||||
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
|
||||
bool useQuickKmdSleepPassed = false;
|
||||
QueueThrottle throttlePassed = QueueThrottle::MEDIUM;
|
||||
};
|
||||
FlushStamp flushStampToWaitPassed = 0;
|
||||
uint32_t taskCountToWaitPassed = 0;
|
||||
uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
|
||||
bool useQuickKmdSleepPassed = false;
|
||||
QueueThrottle throttlePassed = QueueThrottle::MEDIUM;
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallbackReturnValue{WaitStatus::Ready};
|
||||
};
|
||||
|
||||
auto myMockCsr = std::make_unique<MyMockCsr>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
HWTEST_F(BcsTests, GivenNoneGpuHangWhenBlitFromHostPtrCalledThenCallWaitWithKmdFallbackAndNewTaskCountIsReturned) {
|
||||
auto myMockCsr = std::make_unique<MyMockCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
auto &bcsOsContext = pDevice->getUltCommandStreamReceiver<FamilyType>().getOsContext();
|
||||
myMockCsr->initializeTagAllocation();
|
||||
myMockCsr->setupContext(bcsOsContext);
|
||||
@@ -648,11 +650,52 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
|
||||
graphicsAllocation->getGpuAddress(), 0,
|
||||
0, 0, {1, 1, 1}, 0, 0, 0, 0);
|
||||
|
||||
flushBcsTask(myMockCsr.get(), blitProperties, false, *pDevice);
|
||||
const auto taskCount1 = flushBcsTask(myMockCsr.get(), blitProperties, false, *pDevice);
|
||||
EXPECT_TRUE(taskCount1.has_value());
|
||||
|
||||
EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
|
||||
|
||||
flushBcsTask(myMockCsr.get(), blitProperties, true, *pDevice);
|
||||
const auto taskCount2 = flushBcsTask(myMockCsr.get(), blitProperties, true, *pDevice);
|
||||
EXPECT_TRUE(taskCount2.has_value());
|
||||
|
||||
EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
|
||||
EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed);
|
||||
EXPECT_EQ(myMockCsr->flushStamp->peekStamp(), myMockCsr->flushStampToWaitPassed);
|
||||
EXPECT_FALSE(myMockCsr->useQuickKmdSleepPassed);
|
||||
EXPECT_EQ(myMockCsr->throttlePassed, QueueThrottle::MEDIUM);
|
||||
EXPECT_EQ(1u, myMockCsr->activePartitions);
|
||||
}
|
||||
|
||||
HWTEST_F(BcsTests, GivenGpuHangWhenBlitFromHostPtrCalledThenCallWaitWithKmdFallbackAndDoNotReturnNewTaskCount) {
|
||||
auto myMockCsr = std::make_unique<MyMockCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
auto &bcsOsContext = pDevice->getUltCommandStreamReceiver<FamilyType>().getOsContext();
|
||||
myMockCsr->initializeTagAllocation();
|
||||
myMockCsr->setupContext(bcsOsContext);
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||
|
||||
constexpr size_t hostAllocationSize = MemoryConstants::pageSize;
|
||||
auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize);
|
||||
void *hostPtr = reinterpret_cast<void *>(hostAllocationPtr.get());
|
||||
|
||||
auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
|
||||
|
||||
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
|
||||
*myMockCsr, graphicsAllocation, nullptr,
|
||||
hostPtr,
|
||||
graphicsAllocation->getGpuAddress(), 0,
|
||||
0, 0, {1, 1, 1}, 0, 0, 0, 0);
|
||||
|
||||
const auto taskCount1 = flushBcsTask(myMockCsr.get(), blitProperties, false, *pDevice);
|
||||
EXPECT_TRUE(taskCount1.has_value());
|
||||
|
||||
EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
|
||||
|
||||
myMockCsr->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang;
|
||||
|
||||
const auto taskCount2 = flushBcsTask(myMockCsr.get(), blitProperties, true, *pDevice);
|
||||
EXPECT_FALSE(taskCount2.has_value());
|
||||
|
||||
EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
|
||||
EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed);
|
||||
|
||||
@@ -22,7 +22,7 @@ struct BcsTests : public Test<ClDeviceFixture> {
|
||||
Test<ClDeviceFixture>::TearDown();
|
||||
}
|
||||
|
||||
uint32_t flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) {
|
||||
std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) {
|
||||
BlitPropertiesContainer container;
|
||||
container.push_back(blitProperties);
|
||||
|
||||
|
||||
@@ -517,6 +517,55 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenLocalMemoryAllocationWhenB
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenGpuHangOnFlushBcsTaskAndLocalMemoryAllocationWhenBlitMemoryToAllocationIsCalledThenGpuHangIsReturned) {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnableLocalMemory.set(true);
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
|
||||
|
||||
VariableBackup<HardwareInfo> backupHwInfo(defaultHwInfo.get());
|
||||
defaultHwInfo->capabilityTable.blitterOperationsSupported = true;
|
||||
UltClDeviceFactory deviceFactory{1, 2};
|
||||
|
||||
auto testedDevice = deviceFactory.rootDevices[0];
|
||||
|
||||
MockContext context(testedDevice);
|
||||
cl_int retVal;
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(&context, {}, 1, nullptr, retVal));
|
||||
auto memory = buffer->getGraphicsAllocation(testedDevice->getRootDeviceIndex());
|
||||
|
||||
uint8_t hostMemory[1];
|
||||
auto executionEnv = testedDevice->getExecutionEnvironment();
|
||||
executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
|
||||
|
||||
EXPECT_EQ(BlitOperationResult::Unsupported, BlitHelper::blitMemoryToAllocation(buffer->getContext()->getDevice(0)->getDevice(), memory, buffer->getOffset(), hostMemory, {1, 1, 1}));
|
||||
|
||||
executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
const auto rootDevice = testedDevice->getDevice().getRootDevice();
|
||||
const auto blitDevice = rootDevice->getNearestGenericSubDevice(0);
|
||||
auto &selectorCopyEngine = blitDevice->getSelectorCopyEngine();
|
||||
auto deviceBitfield = blitDevice->getDeviceBitfield();
|
||||
|
||||
const auto &hwInfo = testedDevice->getDevice().getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
auto internalUsage = true;
|
||||
auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, internalUsage);
|
||||
auto bcsEngineUsage = hwHelper.preferInternalBcsEngine() ? EngineUsage::Internal : EngineUsage::Regular;
|
||||
auto bcsEngine = blitDevice->tryGetEngine(bcsEngineType, bcsEngineUsage);
|
||||
ASSERT_NE(nullptr, bcsEngine);
|
||||
|
||||
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine->commandStreamReceiver);
|
||||
ultBcsCsr->callBaseFlushBcsTask = false;
|
||||
ultBcsCsr->flushBcsTaskReturnValue = std::nullopt;
|
||||
|
||||
EXPECT_EQ(BlitOperationResult::GpuHang, BlitHelper::blitMemoryToAllocation(buffer->getContext()->getDevice(0)->getDevice(), memory, buffer->getOffset(), hostMemory, {1, 1, 1}));
|
||||
}
|
||||
|
||||
struct AllocationReuseContextTest : ContextTest {
|
||||
void addMappedPtr(Buffer &buffer, void *ptr, size_t ptrLength) {
|
||||
auto &handler = context->getMapOperationsStorage().getHandler(&buffer);
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_platform.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_printf_handler.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
#include "opencl/test/unit_test/os_interface/mock_performance_counters.h"
|
||||
|
||||
@@ -587,6 +588,105 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
|
||||
EXPECT_FALSE(surface->isResident(pDevice->getDefaultEngine().osContext->getContextId()));
|
||||
}
|
||||
|
||||
TEST_F(InternalsEventTest, givenGpuHangOnCmdQueueWaitFunctionAndBlockedKernelWithPrintfWhenSubmittedThenEventIsAbortedAndHangIsReported) {
|
||||
MockCommandQueue mockCmdQueue(mockContext, pClDevice, nullptr, false);
|
||||
mockCmdQueue.waitUntilCompleteReturnValue = WaitStatus::GpuHang;
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
MockEvent<Event> event(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
|
||||
|
||||
auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()}));
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh);
|
||||
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh);
|
||||
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh);
|
||||
|
||||
auto blockedCommandsData = std::make_unique<KernelOperation>(cmdStream, *mockCmdQueue.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
blockedCommandsData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
std::string testString = "test";
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||
auto pKernel = mockKernelWithInternals.mockKernel;
|
||||
|
||||
auto &kernelInfo = mockKernelWithInternals.kernelInfo;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
|
||||
kernelInfo.setPrintfSurface(sizeof(uintptr_t), 0);
|
||||
kernelInfo.addToPrintfStringsMap(0, testString);
|
||||
|
||||
uint64_t crossThread[10];
|
||||
pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
std::unique_ptr<PrintfHandler> printfHandler(PrintfHandler::create(multiDispatchInfo, *pClDevice));
|
||||
printfHandler.get()->prepareDispatch(multiDispatchInfo);
|
||||
auto surface = printfHandler.get()->getSurface();
|
||||
|
||||
auto printfSurface = reinterpret_cast<uint32_t *>(surface->getUnderlyingBuffer());
|
||||
printfSurface[0] = 8;
|
||||
printfSurface[1] = 0;
|
||||
|
||||
std::vector<Surface *> v;
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
|
||||
event.setCommand(std::unique_ptr<Command>(cmd));
|
||||
|
||||
event.submitCommand(false);
|
||||
EXPECT_EQ(Event::executionAbortedDueToGpuHang, event.peekExecutionStatus());
|
||||
|
||||
std::string output = testing::internal::GetCapturedStdout();
|
||||
EXPECT_STREQ("test", output.c_str());
|
||||
}
|
||||
|
||||
TEST_F(InternalsEventTest, givenGpuHangOnPrintingEnqueueOutputAndBlockedKernelWithPrintfWhenSubmittedThenEventIsAbortedAndHangIsReported) {
|
||||
MockCommandQueue mockCmdQueue(mockContext, pClDevice, nullptr, false);
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
MockEvent<Event> event(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
|
||||
|
||||
auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()}));
|
||||
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
|
||||
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh);
|
||||
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh);
|
||||
mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh);
|
||||
|
||||
auto blockedCommandsData = std::make_unique<KernelOperation>(cmdStream, *mockCmdQueue.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
blockedCommandsData->setHeaps(dsh, ioh, ssh);
|
||||
|
||||
std::string testString = "test";
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
|
||||
auto pKernel = mockKernelWithInternals.mockKernel;
|
||||
|
||||
auto &kernelInfo = mockKernelWithInternals.kernelInfo;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
|
||||
kernelInfo.setPrintfSurface(sizeof(uintptr_t), 0);
|
||||
kernelInfo.addToPrintfStringsMap(0, testString);
|
||||
|
||||
uint64_t crossThread[10];
|
||||
pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
|
||||
std::unique_ptr<MockPrintfHandler> printfHandler(new MockPrintfHandler(*pClDevice));
|
||||
printfHandler.get()->prepareDispatch(multiDispatchInfo);
|
||||
auto surface = printfHandler.get()->getSurface();
|
||||
|
||||
auto printfSurface = reinterpret_cast<uint32_t *>(surface->getUnderlyingBuffer());
|
||||
printfSurface[0] = 8;
|
||||
printfSurface[1] = 0;
|
||||
|
||||
std::vector<Surface *> v;
|
||||
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
|
||||
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
|
||||
event.setCommand(std::unique_ptr<Command>(cmd));
|
||||
|
||||
event.submitCommand(false);
|
||||
EXPECT_EQ(Event::executionAbortedDueToGpuHang, event.peekExecutionStatus());
|
||||
|
||||
std::string output = testing::internal::GetCapturedStdout();
|
||||
EXPECT_TRUE(output.empty());
|
||||
}
|
||||
|
||||
TEST_F(InternalsEventTest, GivenMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) {
|
||||
auto pCmdQ = make_releaseable<MockCommandQueue>(mockContext, pClDevice, nullptr, false);
|
||||
MockEvent<Event> event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0);
|
||||
|
||||
@@ -542,7 +542,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override {
|
||||
return WaitStatus::Ready;
|
||||
}
|
||||
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
|
||||
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
|
||||
|
||||
CompletionStamp flushTask(
|
||||
LinearStream &commandStream,
|
||||
|
||||
@@ -22,6 +22,7 @@ set(IGDRCL_SRCS_tests_mocks
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_platform.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_platform.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_printf_handler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_program.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_program.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h
|
||||
|
||||
@@ -348,6 +348,10 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
|
||||
WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override {
|
||||
latestTaskCountWaited = gpgpuTaskCountToWait;
|
||||
if (waitUntilCompleteReturnValue.has_value()) {
|
||||
return *waitUntilCompleteReturnValue;
|
||||
}
|
||||
|
||||
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
|
||||
}
|
||||
|
||||
@@ -395,6 +399,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
std::atomic<uint32_t> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
|
||||
bool flushCalled = false;
|
||||
std::optional<WaitStatus> waitForAllEnginesReturnValue{};
|
||||
std::optional<WaitStatus> waitUntilCompleteReturnValue{};
|
||||
int waitForAllEnginesCalledCount{0};
|
||||
|
||||
LinearStream *peekCommandStream() {
|
||||
|
||||
22
opencl/test/unit_test/mocks/mock_printf_handler.h
Normal file
22
opencl/test/unit_test/mocks/mock_printf_handler.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "opencl/source/program/printf_handler.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
class MockPrintfHandler : public PrintfHandler {
|
||||
public:
|
||||
MockPrintfHandler(ClDevice &device) : PrintfHandler{device} {}
|
||||
~MockPrintfHandler() override = default;
|
||||
|
||||
bool printEnqueueOutput() override {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
@@ -147,7 +147,7 @@ HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOut
|
||||
printfHandler->prepareDispatch(multiDispatchInfo);
|
||||
EXPECT_NE(nullptr, printfHandler->getSurface());
|
||||
|
||||
printfHandler->printEnqueueOutput();
|
||||
EXPECT_TRUE(printfHandler->printEnqueueOutput());
|
||||
|
||||
auto &bcsEngine = device->getEngine(EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), device->getSelectorCopyEngine(), true), EngineUsage::Regular);
|
||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine.commandStreamReceiver);
|
||||
@@ -161,6 +161,41 @@ HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOut
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(PrintfHandlerTests, givenGpuHangOnFlushBcsStreamAndEnabledStatelessCompressionWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedToDecompressPrintfOutputAndFalseIsReturned) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
REQUIRE_BLITTER_OR_SKIP(&hwInfo);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
MockContext context(device.get());
|
||||
|
||||
auto kernelInfo = std::make_unique<MockKernelInfo>();
|
||||
kernelInfo->setPrintfSurface(sizeof(uintptr_t), 0);
|
||||
|
||||
auto program = std::make_unique<MockProgram>(&context, false, toClDeviceVector(*device));
|
||||
|
||||
uint64_t crossThread[10];
|
||||
auto kernel = std::make_unique<MockKernel>(program.get(), *kernelInfo, *device);
|
||||
kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8);
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel.get());
|
||||
std::unique_ptr<PrintfHandler> printfHandler(PrintfHandler::create(multiDispatchInfo, *device));
|
||||
printfHandler->prepareDispatch(multiDispatchInfo);
|
||||
EXPECT_NE(nullptr, printfHandler->getSurface());
|
||||
|
||||
auto &bcsEngine = device->getEngine(EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), device->getSelectorCopyEngine(), true), EngineUsage::Regular);
|
||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsEngine.commandStreamReceiver);
|
||||
bcsCsr->callBaseFlushBcsTask = false;
|
||||
bcsCsr->flushBcsTaskReturnValue = std::nullopt;
|
||||
|
||||
EXPECT_FALSE(printfHandler->printEnqueueOutput());
|
||||
EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
|
||||
EXPECT_EQ(BlitterConstants::BlitDirection::BufferToHostPtr, bcsCsr->receivedBlitProperties[0].blitDirection);
|
||||
}
|
||||
|
||||
HWTEST_F(PrintfHandlerTests, givenDisallowedLocalMemoryCpuAccessWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedToCopyPrintfOutput) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
@@ -40,7 +40,7 @@ struct BlitXE_HP_CORETests : public ::testing::Test {
|
||||
clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
}
|
||||
|
||||
uint32_t flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
|
||||
std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
blitPropertiesContainer.push_back(blitProperties);
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ struct BlitXeHpcCoreTests : public ::testing::Test {
|
||||
clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
}
|
||||
|
||||
uint32_t flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
|
||||
std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
blitPropertiesContainer.push_back(blitProperties);
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ struct BlitXeHpgCoreTests : public ::testing::Test {
|
||||
clDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
}
|
||||
|
||||
uint32_t flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
|
||||
std::optional<uint32_t> flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) {
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
blitPropertiesContainer.push_back(blitProperties);
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
namespace NEO {
|
||||
class AllocationsList;
|
||||
@@ -227,7 +228,7 @@ class CommandStreamReceiver {
|
||||
this->latestFlushedTaskCount = latestFlushedTaskCount;
|
||||
}
|
||||
|
||||
virtual uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
|
||||
virtual std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
|
||||
|
||||
virtual void flushTagUpdate() = 0;
|
||||
virtual void updateTagFromWait() = 0;
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
|
||||
#include "hw_cmds.h"
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
class DeviceCommandStreamReceiver;
|
||||
@@ -96,7 +98,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
return CommandStreamReceiverType::CSR_HW;
|
||||
}
|
||||
|
||||
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
|
||||
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
|
||||
|
||||
void flushTagUpdate() override;
|
||||
void flushMiFlushDW();
|
||||
|
||||
@@ -1015,7 +1015,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::unregisterDirectSubmissionFromCo
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
|
||||
std::optional<uint32_t> CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
|
||||
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
|
||||
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
|
||||
|
||||
@@ -1128,8 +1128,12 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesCo
|
||||
|
||||
lock.unlock();
|
||||
if (blocking) {
|
||||
waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, QueueThrottle::MEDIUM);
|
||||
const auto waitStatus = waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, QueueThrottle::MEDIUM);
|
||||
internalAllocationStorage->cleanAllocationList(newTaskCount, TEMPORARY_ALLOCATION);
|
||||
|
||||
if (waitStatus == WaitStatus::GpuHang) {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
return newTaskCount;
|
||||
|
||||
@@ -209,7 +209,11 @@ BlitOperationResult BlitHelper::blitMemoryToAllocationBanks(const Device &device
|
||||
hostPtr,
|
||||
(memory->getGpuAddress() + offset),
|
||||
0, 0, 0, size, 0, 0, 0, 0));
|
||||
bcsEngine->commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, *pDeviceForBlit);
|
||||
|
||||
const auto newTaskCount = bcsEngine->commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, *pDeviceForBlit);
|
||||
if (!newTaskCount) {
|
||||
return BlitOperationResult::GpuHang;
|
||||
}
|
||||
}
|
||||
|
||||
return BlitOperationResult::Success;
|
||||
|
||||
@@ -96,7 +96,8 @@ struct BlitProperties {
|
||||
enum class BlitOperationResult {
|
||||
Unsupported,
|
||||
Fail,
|
||||
Success
|
||||
Success,
|
||||
GpuHang
|
||||
};
|
||||
|
||||
namespace BlitHelperFunctions {
|
||||
|
||||
@@ -49,7 +49,7 @@ class UltAubCommandStreamReceiver : public AUBCommandStreamReceiverHw<GfxFamily>
|
||||
return csr;
|
||||
}
|
||||
|
||||
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
|
||||
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
|
||||
blitBufferCalled++;
|
||||
return BaseClass::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
|
||||
}
|
||||
|
||||
@@ -269,10 +269,15 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
return CommandStreamReceiverHw<GfxFamily>::obtainUniqueOwnership();
|
||||
}
|
||||
|
||||
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
|
||||
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
|
||||
blitBufferCalled++;
|
||||
receivedBlitProperties = blitPropertiesContainer;
|
||||
return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
|
||||
|
||||
if (callBaseFlushBcsTask) {
|
||||
return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
|
||||
} else {
|
||||
return flushBcsTaskReturnValue;
|
||||
}
|
||||
}
|
||||
|
||||
bool createPerDssBackedBuffer(Device &device) override {
|
||||
@@ -368,6 +373,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
bool shouldFlushBatchedSubmissionsReturnSuccess = false;
|
||||
WaitStatus returnWaitForCompletionWithTimeout = WaitStatus::Ready;
|
||||
std::optional<WaitStatus> waitForTaskCountWithKmdNotifyFallbackReturnValue{};
|
||||
bool callBaseFlushBcsTask{true};
|
||||
std::optional<uint32_t> flushBcsTaskReturnValue{};
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -112,7 +112,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
return WaitStatus::Ready;
|
||||
}
|
||||
|
||||
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
|
||||
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
|
||||
|
||||
CommandStreamReceiverType getType() override {
|
||||
return CommandStreamReceiverType::CSR_HW;
|
||||
@@ -277,7 +277,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
|
||||
std::optional<uint32_t> flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
|
||||
if (!skipBlitCalls) {
|
||||
return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user