fix: Deffer timestamp packet after failed submission

Related-To: NEO-7835

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2023-04-28 10:16:05 +00:00
committed by Compute-Runtime-Automation
parent 55f6b142cd
commit 464c67789f
4 changed files with 114 additions and 20 deletions

View File

@ -367,21 +367,21 @@ class CommandQueueHw : public CommandQueue {
template <uint32_t cmdType>
cl_int enqueueBlitSplit(MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr);
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
size_t surfaceCount,
LinearStream &commandStream,
size_t commandStreamStart,
bool &blocking,
bool clearDependenciesForSubCapture,
const MultiDispatchInfo &multiDispatchInfo,
const EnqueueProperties &enqueueProperties,
TimestampPacketDependencies &timestampPacketDependencies,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
TaskCountType taskLevel,
PrintfHandler *printfHandler,
bool relaxedOrderingEnabled,
uint32_t commandType);
MOCKABLE_VIRTUAL CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
size_t surfaceCount,
LinearStream &commandStream,
size_t commandStreamStart,
bool &blocking,
bool clearDependenciesForSubCapture,
const MultiDispatchInfo &multiDispatchInfo,
const EnqueueProperties &enqueueProperties,
TimestampPacketDependencies &timestampPacketDependencies,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
TaskCountType taskLevel,
PrintfHandler *printfHandler,
bool relaxedOrderingEnabled,
uint32_t commandType);
void enqueueBlocked(uint32_t commandType,
Surface **surfacesForResidency,

View File

@ -410,6 +410,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
if (completionStamp.taskCount > CompletionStamp::notReady) {
if (deferredTimestampPackets.get()) {
timestampPacketContainer->moveNodesToNewContainer(*deferredTimestampPackets);
}
return CommandQueue::getErrorCodeFromTaskCount(completionStamp.taskCount);
}

View File

@ -716,14 +716,14 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs
}
struct EnqueueHandlerTestBasic : public ::testing::Test {
template <typename FamilyType>
std::unique_ptr<MockCommandQueueHw<FamilyType>> setupFixtureAndCreateMockCommandQueue() {
template <typename MockCmdQueueType, typename FamilyType>
std::unique_ptr<MockCmdQueueType> setupFixtureAndCreateMockCommandQueue() {
auto executionEnvironment = platform()->peekExecutionEnvironment();
device = std::make_unique<MockClDevice>(MockDevice::createWithExecutionEnvironment<MockDevice>(nullptr, executionEnvironment, 0u));
context = std::make_unique<MockContext>(device.get());
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto mockCmdQ = std::make_unique<MockCmdQueueType>(context.get(), device.get(), nullptr);
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(mockCmdQ->getGpgpuCommandStreamReceiver());
ultCsr.taskCount = initialTaskCount;
@ -741,7 +741,7 @@ struct EnqueueHandlerTestBasic : public ::testing::Test {
};
HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) {
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<FamilyType>();
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueHw<FamilyType>, FamilyType>();
MockKernelWithInternals kernelInternals(*device, context.get());
Kernel *kernel = kernelInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel);
@ -757,7 +757,7 @@ HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCom
}
HWTEST_F(EnqueueHandlerTestBasic, givenBlockedEnqueueHandlerWhenCommandIsBlokingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) {
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<FamilyType>();
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueHw<FamilyType>, FamilyType>();
MockKernelWithInternals kernelInternals(*device, context.get());
Kernel *kernel = kernelInternals.mockKernel;
@ -783,3 +783,93 @@ HWTEST_F(EnqueueHandlerTestBasic, givenBlockedEnqueueHandlerWhenCommandIsBloking
t0.join();
}
template <typename FamilyType>
class MockCommandQueueFailEnqueue : public MockCommandQueueHw<FamilyType> {
public:
MockCommandQueueFailEnqueue(Context *context,
ClDevice *device,
cl_queue_properties *properties) : MockCommandQueueHw<FamilyType>(context, device, properties) {
mockTagAllocator = std::make_unique<MockTagAllocator<>>(0, device->getDevice().getMemoryManager());
this->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
this->deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
}
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
size_t surfaceCount,
LinearStream &commandStream,
size_t commandStreamStart,
bool &blocking,
bool clearDependenciesForSubCapture,
const MultiDispatchInfo &multiDispatchInfo,
const EnqueueProperties &enqueueProperties,
TimestampPacketDependencies &timestampPacketDependencies,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
TaskCountType taskLevel,
PrintfHandler *printfHandler,
bool relaxedOrderingEnabled,
uint32_t commandType) override {
this->timestampPacketContainer->add(mockTagAllocator->getTag());
CompletionStamp stamp{};
stamp.taskCount = taskCountToReturn;
return stamp;
}
TaskCountType taskCountToReturn = 0;
std::unique_ptr<MockTagAllocator<>> mockTagAllocator;
};
HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenEnqueueFailedThenTimestampPacketContainerIsEmpty) {
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueFailEnqueue<FamilyType>, FamilyType>();
MockKernelWithInternals kernelInternals(*device, context.get());
Kernel *kernel = kernelInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel);
mockCmdQ->taskCountToReturn = CompletionStamp::gpuHang;
mockCmdQ->template enqueueHandler<CL_COMMAND_BARRIER>(nullptr,
0,
true,
multiDispatchInfo,
0,
nullptr,
nullptr);
EXPECT_TRUE(mockCmdQ->timestampPacketContainer->peekNodes().empty());
TimestampPacketContainer release;
mockCmdQ->deferredTimestampPackets->swapNodes(release);
}
HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenEnqueueSucceedsThenTimestampPacketContainerIsNotEmpty) {
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueFailEnqueue<FamilyType>, FamilyType>();
MockKernelWithInternals kernelInternals(*device, context.get());
Kernel *kernel = kernelInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel);
mockCmdQ->taskCountToReturn = 100;
mockCmdQ->template enqueueHandler<CL_COMMAND_BARRIER>(nullptr,
0,
true,
multiDispatchInfo,
0,
nullptr,
nullptr);
EXPECT_FALSE(mockCmdQ->timestampPacketContainer->peekNodes().empty());
TimestampPacketContainer release;
mockCmdQ->timestampPacketContainer->swapNodes(release);
}
HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenEnqueueFailedButThereIsNoDeferredContainerThenTimestampPacketContainerIsNotEmpty) {
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueFailEnqueue<FamilyType>, FamilyType>();
MockKernelWithInternals kernelInternals(*device, context.get());
Kernel *kernel = kernelInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel);
mockCmdQ->taskCountToReturn = CompletionStamp::gpuHang;
mockCmdQ->deferredTimestampPackets.reset();
mockCmdQ->template enqueueHandler<CL_COMMAND_BARRIER>(nullptr,
0,
true,
multiDispatchInfo,
0,
nullptr,
nullptr);
EXPECT_FALSE(mockCmdQ->timestampPacketContainer->peekNodes().empty());
TimestampPacketContainer release;
mockCmdQ->timestampPacketContainer->swapNodes(release);
}

View File

@ -30,6 +30,7 @@ class MockCommandQueue : public CommandQueue {
using CommandQueue::blitEnqueueAllowed;
using CommandQueue::blitEnqueueImageAllowed;
using CommandQueue::bufferCpuCopyAllowed;
using CommandQueue::deferredTimestampPackets;
using CommandQueue::device;
using CommandQueue::gpgpuEngine;
using CommandQueue::isCopyOnly;