mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
feature: control post sync completion check
Related-To: NEO-14844 Signed-off-by: Tomasz Biernacik <tomasz.biernacik@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
087d1ecea4
commit
2c5cbec033
@@ -137,6 +137,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
|
||||
this->heaplessStateInitEnabled = compilerProductHelper.isHeaplessStateInitEnabled(this->heaplessModeEnabled);
|
||||
this->isForceStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
this->l3FlushAfterPostSyncEnabled = productHelper.isL3FlushAfterPostSyncRequired(this->heaplessModeEnabled);
|
||||
this->shouldRegisterEnqueuedWalkerWithProfiling = productHelper.shouldRegisterEnqueuedWalkerWithProfiling();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -415,6 +415,20 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
return this->isCacheFlushOnNextBcsWriteRequired && this->isImageWriteOperation(cmdType);
|
||||
}
|
||||
|
||||
bool getShouldRegisterEnqueuedWalkerWithProfiling() {
|
||||
return this->shouldRegisterEnqueuedWalkerWithProfiling;
|
||||
}
|
||||
|
||||
void registerWalkerWithProfilingEnqueued() {
|
||||
this->isWalkerWithProfilingEnqueued = true;
|
||||
}
|
||||
|
||||
bool getAndClearIsWalkerWithProfilingEnqueued() {
|
||||
bool retVal = this->isWalkerWithProfilingEnqueued;
|
||||
this->isWalkerWithProfilingEnqueued = false;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
||||
@@ -529,6 +543,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
bool isForceStateless = false;
|
||||
bool l3FlushedAfterCpuRead = true;
|
||||
bool l3FlushAfterPostSyncEnabled = false;
|
||||
bool isWalkerWithProfilingEnqueued = false;
|
||||
bool shouldRegisterEnqueuedWalkerWithProfiling = false;
|
||||
};
|
||||
|
||||
static_assert(NEO::NonCopyableAndNonMovable<CommandQueue>);
|
||||
|
||||
@@ -918,6 +918,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
isDcFlushRequiredOnStallingCommandsOnNextFlush() // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
||||
);
|
||||
|
||||
dispatchFlags.isWalkerWithProfilingEnqueued = getAndClearIsWalkerWithProfilingEnqueued();
|
||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
|
||||
uint32_t lws[3] = {static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().x), static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().y), static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().z)};
|
||||
@@ -1178,6 +1179,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
isDcFlushRequiredOnStallingCommandsOnNextFlush() // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
||||
);
|
||||
|
||||
dispatchFlags.isWalkerWithProfilingEnqueued = getAndClearIsWalkerWithProfilingEnqueued();
|
||||
|
||||
const bool isHandlingBarrier = isStallingCommandsOnNextFlushRequired();
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
|
||||
@@ -155,12 +155,17 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
if (commandQueue.getShouldRegisterEnqueuedWalkerWithProfiling() && commandQueue.isProfilingEnabled() && walkerArgs.event) {
|
||||
commandQueue.registerWalkerWithProfilingEnqueued();
|
||||
}
|
||||
|
||||
if (PauseOnGpuProperties::gpuScratchRegWriteAllowed(debugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) {
|
||||
uint32_t registerOffset = debugManager.flags.GpuScratchRegWriteRegisterOffset.get();
|
||||
uint32_t registerData = debugManager.flags.GpuScratchRegWriteRegisterData.get();
|
||||
|
||||
PipeControlArgs args;
|
||||
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, commandQueue.getDevice().getRootDeviceEnvironment());
|
||||
args.isWalkerWithProfilingEnqueued = commandQueue.getAndClearIsWalkerWithProfilingEnqueued();
|
||||
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandStream,
|
||||
PostSyncMode::noWrite,
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "opencl/test/unit_test/command_queue/hardware_interface_helper.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_mdi.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
@@ -1385,30 +1386,61 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsAndLocalWorkSizeIsSetThenIohRequiresMoreSpace) {
|
||||
debugManager.flags.EnableHwGenerationLocalIds.set(0);
|
||||
HWTEST_F(DispatchWalkerTest, givenProfilingEnabledWhenProgrammingWalkerThenSetIsWalkerWithProfilingEnqueued) {
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workItems[3] = {1, 1, 1};
|
||||
size_t workGroupSize[3] = {683, 1, 1};
|
||||
size_t workGroupSize[3] = {2, 5, 10};
|
||||
cl_uint dimensions = 1;
|
||||
|
||||
auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 1u;
|
||||
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(kernelInfo.kernelDescriptor);
|
||||
MockKernel kernelWithImplicitArgs(program.get(), kernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernelWithImplicitArgs.initialize());
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = false;
|
||||
MockKernel kernelWithoutImplicitArgs(program.get(), kernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernelWithoutImplicitArgs.initialize());
|
||||
|
||||
DispatchInfo dispatchInfoWithImplicitArgs(pClDevice, const_cast<MockKernel *>(&kernelWithImplicitArgs), dimensions, workItems, workGroupSize, globalOffsets);
|
||||
dispatchInfoWithImplicitArgs.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfoWithImplicitArgs.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
DispatchInfo dispatchInfoWithoutImplicitArgs(pClDevice, const_cast<MockKernel *>(&kernelWithoutImplicitArgs), dimensions, workItems, workGroupSize, globalOffsets);
|
||||
dispatchInfoWithoutImplicitArgs.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfoWithoutImplicitArgs.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
MultiDispatchInfo multiDispatchInfoWithoutImplicitArgs(&kernelWithoutImplicitArgs);
|
||||
multiDispatchInfoWithoutImplicitArgs.push(dispatchInfoWithoutImplicitArgs);
|
||||
HardwareInterfaceWalkerArgs walkerArgsWithoutImplicitArgs = createHardwareInterfaceWalkerArgs(CL_COMMAND_NDRANGE_KERNEL);
|
||||
walkerArgsWithoutImplicitArgs.blockedCommandsData = blockedCommandsData.get();
|
||||
auto *event = new MockEvent<Event>(nullptr, 0, 0, 0);
|
||||
|
||||
auto iohSizeWithImplicitArgsWithoutLWS = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(kernelWithImplicitArgs, workGroupSize, pClDevice->getRootDeviceEnvironment());
|
||||
{
|
||||
walkerArgsWithoutImplicitArgs.event = event;
|
||||
HardwareInterface<FamilyType>::template dispatchWalker<typename FamilyType::DefaultWalkerType>(
|
||||
*pCmdQ,
|
||||
multiDispatchInfoWithoutImplicitArgs,
|
||||
CsrDependencies(),
|
||||
walkerArgsWithoutImplicitArgs);
|
||||
|
||||
dispatchInfoWithImplicitArgs.setLWS({683, 1, 1});
|
||||
EXPECT_FALSE(pCmdQ->getAndClearIsWalkerWithProfilingEnqueued());
|
||||
}
|
||||
|
||||
auto lws = dispatchInfoWithImplicitArgs.getLocalWorkgroupSize();
|
||||
kernelWithImplicitArgs.setLocalWorkSizeValues(static_cast<uint32_t>(lws.x), static_cast<uint32_t>(lws.y), static_cast<uint32_t>(lws.z));
|
||||
reinterpret_cast<MockCommandQueue *>(pCmdQ)->setProfilingEnabled();
|
||||
{
|
||||
walkerArgsWithoutImplicitArgs.event = nullptr;
|
||||
HardwareInterface<FamilyType>::template dispatchWalker<typename FamilyType::DefaultWalkerType>(
|
||||
*pCmdQ,
|
||||
multiDispatchInfoWithoutImplicitArgs,
|
||||
CsrDependencies(),
|
||||
walkerArgsWithoutImplicitArgs);
|
||||
|
||||
auto iohSizeWithImplicitArgsWithLWS = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(kernelWithImplicitArgs, workGroupSize, pClDevice->getRootDeviceEnvironment());
|
||||
EXPECT_FALSE(pCmdQ->getAndClearIsWalkerWithProfilingEnqueued());
|
||||
}
|
||||
|
||||
EXPECT_LE(iohSizeWithImplicitArgsWithoutLWS, iohSizeWithImplicitArgsWithLWS);
|
||||
{
|
||||
walkerArgsWithoutImplicitArgs.event = event;
|
||||
HardwareInterface<FamilyType>::template dispatchWalker<typename FamilyType::DefaultWalkerType>(
|
||||
*pCmdQ,
|
||||
multiDispatchInfoWithoutImplicitArgs,
|
||||
CsrDependencies(),
|
||||
walkerArgsWithoutImplicitArgs);
|
||||
|
||||
EXPECT_EQ(pClDevice->getRootDeviceEnvironment().getProductHelper().shouldRegisterEnqueuedWalkerWithProfiling(), pCmdQ->getAndClearIsWalkerWithProfilingEnqueued());
|
||||
}
|
||||
|
||||
event->release();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user