diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index 434224c412..dd36d37975 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -612,4 +612,24 @@ bool CommandQueue::isBlockedCommandStreamRequired(uint32_t commandType, const Ev return false; } + +void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo) { + if (DebugManager.flags.AUBDumpSubCaptureMode.get()) { + auto status = getGpgpuCommandStreamReceiver().checkAndActivateAubSubCapture(multiDispatchInfo); + if (!status.isActive) { + // make each enqueue blocking when subcapture is not active to split batch buffer + blocking = true; + } else if (!status.wasActiveInPreviousEnqueue) { + // omit timestamp packet dependencies dependencies upon subcapture activation + clearAllDependencies = true; + } + } + + if (getGpgpuCommandStreamReceiver().getType() > CommandStreamReceiverType::CSR_HW) { + for (auto &dispatchInfo : multiDispatchInfo) { + auto kernelName = dispatchInfo.getKernel()->getKernelInfo().name; + getGpgpuCommandStreamReceiver().addAubComment(kernelName.c_str()); + } + } +} } // namespace NEO diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index 4e9bfb388e..791df24d89 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -438,6 +438,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { void providePerformanceHint(TransferProperties &transferProperties); bool queueDependenciesClearRequired() const; bool blitEnqueueAllowed(bool queueBlocked, cl_command_type cmdType); + void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); Context *context = nullptr; Device *device = nullptr; diff --git a/runtime/command_queue/command_queue_hw.h b/runtime/command_queue/command_queue_hw.h index 75e6d05ae7..da51d6361f 100644 --- a/runtime/command_queue/command_queue_hw.h +++ b/runtime/command_queue/command_queue_hw.h @@ -328,7 +328,6 @@ class CommandQueueHw : public CommandQueue { EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, - bool slmUsed, PrintfHandler *printfHandler); template @@ -339,7 +338,6 @@ class CommandQueueHw : public CommandQueue { TimestampPacketContainer *previousTimestampPacketNodes, std::unique_ptr &blockedCommandsData, EventsRequest &eventsRequest, - bool slmUsed, EventBuilder &externalEventBuilder, std::unique_ptr printfHandler); @@ -386,13 +384,15 @@ class CommandQueueHw : public CommandQueue { AuxTranslationDirection auxTranslationDirection); template - LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool profilingRequired, - bool perfCountersRequired, bool blitEnqueue, bool blockedQueue, - const MultiDispatchInfo &multiDispatchInfo, - const EventsRequest &eventsRequest, + LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool blitEnqueue, bool blockedQueue, + const MultiDispatchInfo &multiDispatchInfo, const EventsRequest &eventsRequest, std::unique_ptr &blockedCommandsData, Surface **surfaces, size_t numSurfaces) { LinearStream *commandStream = nullptr; + + bool profilingRequired = (this->isProfilingEnabled() && eventsRequest.outEvent); + bool perfCountersRequired = (this->isPerfCountersEnabled() && eventsRequest.outEvent); + if (isBlockedCommandStreamRequired(commandType, eventsRequest, blockedQueue)) { constexpr size_t additionalAllocationSize = CSRequirements::csOverfetchSize; constexpr size_t allocationSize = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize; @@ -422,11 +422,9 @@ class CommandQueueHw : public CommandQueue { size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch); - void processDeviceEnqueue(Kernel *parentKernel, - DeviceQueueHw *devQueueHw, + void processDeviceEnqueue(DeviceQueueHw *devQueueHw, const MultiDispatchInfo &multiDispatchInfo, TagNode *hwTimeStamps, - PreemptionMode preemption, bool &blocking); template @@ -434,12 +432,10 @@ class CommandQueueHw : public CommandQueue { std::unique_ptr &printfHandler, Event *event, TagNode *&hwTimeStamps, - Kernel *parentKernel, bool blockQueue, DeviceQueueHw *devQueueHw, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, - TimestampPacketContainer &previousTimestampPacketNodes, - PreemptionMode preemption); + TimestampPacketContainer &previousTimestampPacketNodes); }; } // namespace NEO diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 3550b6aa89..83fed699db 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -163,12 +163,8 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, DBG_LOG(EventsDebugEnable, "enqueueHandler commandType", commandType, "output Event", eventBuilder.getEvent()); } - bool profilingRequired = (this->isProfilingEnabled() && event != nullptr); - bool perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr); std::unique_ptr blockedCommandsData; std::unique_ptr printfHandler; - bool slmUsed = multiDispatchInfo.usesSlm() || parentKernel; - auto preemption = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo); TakeOwnershipWrapper> queueOwnership(*this); auto blockQueue = false; @@ -185,23 +181,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, enqueueHandlerHook(commandType, multiDispatchInfo); - if (DebugManager.flags.AUBDumpSubCaptureMode.get()) { - auto status = getGpgpuCommandStreamReceiver().checkAndActivateAubSubCapture(multiDispatchInfo); - if (!status.isActive) { - // make each enqueue blocking when subcapture is not active to split batch buffer - blocking = true; - } else if (!status.wasActiveInPreviousEnqueue) { - // omit timestamp packet dependencies dependencies upon subcapture activation - clearAllDependencies = true; - } - } - - if (getGpgpuCommandStreamReceiver().getType() > CommandStreamReceiverType::CSR_HW) { - for (auto &dispatchInfo : multiDispatchInfo) { - auto kernelName = dispatchInfo.getKernel()->getKernelInfo().name; - getGpgpuCommandStreamReceiver().addAubComment(kernelName.c_str()); - } - } + aubCaptureHook(blocking, clearAllDependencies, multiDispatchInfo); if (DebugManager.flags.MakeEachEnqueueBlocking.get()) { blocking = true; @@ -227,9 +207,8 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } } - auto &commandStream = *obtainCommandStream(csrDeps, profilingRequired, perfCountersRequired, blitEnqueue, blockQueue, - multiDispatchInfo, eventsRequest, blockedCommandsData, surfacesForResidency, - numSurfaceForResidency); + auto &commandStream = *obtainCommandStream(csrDeps, blitEnqueue, blockQueue, multiDispatchInfo, eventsRequest, + blockedCommandsData, surfacesForResidency, numSurfaceForResidency); auto commandStreamStart = commandStream.getUsed(); if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { @@ -242,8 +221,8 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType); } else if (multiDispatchInfo.empty() == false) { processDispatchForKernels(multiDispatchInfo, printfHandler, eventBuilder.getEvent(), - hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(), - previousTimestampPacketNodes, preemption); + hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(), + previousTimestampPacketNodes); } else if (isCacheFlushCommand(commandType)) { processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps); } else if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { @@ -268,7 +247,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, CompletionStamp completionStamp = {Event::eventNotReady, taskLevel, 0}; if (!blockQueue) { if (parentKernel) { - processDeviceEnqueue(parentKernel, devQueueHw, multiDispatchInfo, hwTimeStamps, preemption, blocking); + processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking); } auto kernelSubmissionRequired = !isCommandWithoutKernel(commandType) && !blitEnqueue; @@ -285,7 +264,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, eventsRequest, eventBuilder, taskLevel, - slmUsed, printfHandler.get()); if (parentKernel) { @@ -365,7 +343,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, &previousTimestampPacketNodes, blockedCommandsData, eventsRequest, - slmUsed, eventBuilder, std::move(printfHandler)); } @@ -393,19 +370,17 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf std::unique_ptr &printfHandler, Event *event, TagNode *&hwTimeStamps, - Kernel *parentKernel, bool blockQueue, DeviceQueueHw *devQueueHw, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, - TimestampPacketContainer &previousTimestampPacketNodes, - PreemptionMode preemption) { + TimestampPacketContainer &previousTimestampPacketNodes) { TagNode *hwPerfCounter = nullptr; DebugManager.dumpKernelArgs(&multiDispatchInfo); printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device)); if (printfHandler) { - printfHandler.get()->prepareDispatch(multiDispatchInfo); + printfHandler->prepareDispatch(multiDispatchInfo); } if (commandType == CL_COMMAND_NDRANGE_KERNEL) { @@ -419,7 +394,7 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf hwTimeStamps = event->getHwTimeStampNode(); } - if (parentKernel) { + if (auto parentKernel = multiDispatchInfo.peekParentKernel()) { parentKernel->createReflectionSurface(); parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue()); parentKernel->patchEventPool(context->getDefaultDeviceQueue()); @@ -443,7 +418,6 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf hwPerfCounter, &previousTimestampPacketNodes, timestampPacketContainer.get(), - preemption, commandType); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { @@ -502,12 +476,11 @@ void CommandQueueHw::processDispatchForCacheFlush(Surface **surfaces, } template -void CommandQueueHw::processDeviceEnqueue(Kernel *parentKernel, - DeviceQueueHw *devQueueHw, +void CommandQueueHw::processDeviceEnqueue(DeviceQueueHw *devQueueHw, const MultiDispatchInfo &multiDispatchInfo, TagNode *hwTimeStamps, - PreemptionMode preemption, bool &blocking) { + auto parentKernel = multiDispatchInfo.peekParentKernel(); size_t minSizeSSHForEM = HardwareCommandsHelper::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel); uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1; @@ -531,10 +504,11 @@ void CommandQueueHw::processDeviceEnqueue(Kernel *parentKernel, this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(), devQueueHw->getDebugQueue()); + auto preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo); GpgpuWalkerHelper::dispatchScheduler( *this->commandStream, *devQueueHw, - preemption, + preemptionMode, scheduler, &getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)); @@ -604,7 +578,6 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, - bool slmUsed, PrintfHandler *printfHandler) { UNRECOVERABLE_IF(multiDispatchInfo.empty()); @@ -693,7 +666,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( DispatchFlags dispatchFlags; dispatchFlags.blocking = blocking; dispatchFlags.dcFlush = shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC; - dispatchFlags.useSLM = slmUsed; + dispatchFlags.useSLM = multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel(); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.GSBA32BitRequired = commandType == CL_COMMAND_NDRANGE_KERNEL; dispatchFlags.mediaSamplerRequired = mediaSamplerRequired; @@ -740,7 +713,6 @@ void CommandQueueHw::enqueueBlocked( TimestampPacketContainer *previousTimestampPacketNodes, std::unique_ptr &blockedCommandsData, EventsRequest &eventsRequest, - bool slmUsed, EventBuilder &externalEventBuilder, std::unique_ptr printfHandler) { @@ -790,6 +762,7 @@ void CommandQueueHw::enqueueBlocked( allSurfaces.push_back(surface->duplicate()); } PreemptionMode preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo); + bool slmUsed = multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel(); command = std::make_unique(*this, blockedCommandsData, allSurfaces, diff --git a/runtime/command_queue/hardware_interface.h b/runtime/command_queue/hardware_interface.h index 40059a19cc..095a6c2f3e 100644 --- a/runtime/command_queue/hardware_interface.h +++ b/runtime/command_queue/hardware_interface.h @@ -44,7 +44,6 @@ class HardwareInterface { TagNode *hwPerfCounter, TimestampPacketContainer *previousTimestampPacketNodes, TimestampPacketContainer *currentTimestampPacketNodes, - PreemptionMode preemptionMode, uint32_t commandType); static void getDefaultDshSpace( diff --git a/runtime/command_queue/hardware_interface_base.inl b/runtime/command_queue/hardware_interface_base.inl index 5f56d24554..3b3ffdadcb 100644 --- a/runtime/command_queue/hardware_interface_base.inl +++ b/runtime/command_queue/hardware_interface_base.inl @@ -32,13 +32,13 @@ void HardwareInterface::dispatchWalker( TagNode *hwPerfCounter, TimestampPacketContainer *previousTimestampPacketNodes, TimestampPacketContainer *currentTimestampPacketNodes, - PreemptionMode preemptionMode, uint32_t commandType) { LinearStream *commandStream = nullptr; IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; auto parentKernel = multiDispatchInfo.peekParentKernel(); auto mainKernel = multiDispatchInfo.peekMainKernel(); + auto preemptionMode = PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), multiDispatchInfo); for (auto &dispatchInfo : multiDispatchInfo) { // Compute local workgroup sizes diff --git a/unit_tests/command_queue/dispatch_walker_tests.cpp b/unit_tests/command_queue/dispatch_walker_tests.cpp index df2276c759..7029d75958 100644 --- a/unit_tests/command_queue/dispatch_walker_tests.cpp +++ b/unit_tests/command_queue/dispatch_walker_tests.cpp @@ -156,7 +156,6 @@ HWTEST_F(DispatchWalkerTest, shouldntChangeCommandStreamMemory) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase()); @@ -204,7 +203,6 @@ HWTEST_F(DispatchWalkerTest, noLocalIdsShouldntCrash) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase()); @@ -234,7 +232,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm) nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.workDim); @@ -265,7 +262,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithSquaredLwsAlgorithm) nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.workDim); } @@ -294,7 +290,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithNDLwsAlgorithm) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.workDim); } @@ -324,7 +319,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithOldLwsAlgorithm) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.workDim); } @@ -354,7 +348,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.numWorkGroupsX); @@ -386,7 +379,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeND) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); @@ -417,7 +409,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeND) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); @@ -449,7 +440,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); @@ -481,7 +471,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquaredAn nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); @@ -511,7 +500,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSize) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(1u, *kernel.localWorkSizeX); EXPECT_EQ(2u, *kernel.localWorkSizeY); @@ -544,7 +532,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizes) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(1u, *kernel.localWorkSizeX); EXPECT_EQ(2u, *kernel.localWorkSizeY); @@ -581,7 +568,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizeForSplitKernel) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); auto dispatchId = 0; @@ -632,7 +618,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizesForSplitWalker) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); for (auto &dispatchInfo : multiDispatchInfo) { @@ -684,7 +669,6 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); auto &commandStream = pCmdQ->getCS(1024); @@ -719,7 +703,6 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); Vec3 localWorkgroupSize(workGroupSize); @@ -746,7 +729,7 @@ HWTEST_F(DispatchWalkerTest, givenBlockedEnqueueWhenObtainingCommandStreamThenAl CsrDependencies csrDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); - auto cmdStream = mockCmdQ.template obtainCommandStream(csrDependencies, false, false, false, true, + auto cmdStream = mockCmdQ.template obtainCommandStream(csrDependencies, false, true, multiDispatchInfo, eventsRequest, blockedKernelData, nullptr, 0u); @@ -773,7 +756,6 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhen nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); @@ -800,7 +782,6 @@ HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenComm nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_NE(nullptr, blockedCommandsData->commandStream->getGraphicsAllocation()); @@ -828,7 +809,6 @@ HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerI nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_TRUE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty()); @@ -852,7 +832,6 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerWithMultipleDispatchInfo) { nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); for (auto &dispatchInfo : multiDispatchInfo) { @@ -894,7 +873,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); auto dshAfterMultiDisptach = indirectHeap.getUsed(); @@ -979,7 +957,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; @@ -1025,7 +1002,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; @@ -1076,7 +1052,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; @@ -1129,7 +1104,6 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationReq nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParse; @@ -1168,7 +1142,6 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoK nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParse; @@ -1208,7 +1181,6 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareInterface::dispatchWalker( @@ -1220,7 +1192,6 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParse; @@ -1243,7 +1214,7 @@ HWTEST_F(DispatchWalkerTest, givenMultiDispatchWhenWhitelistedRegisterForCoheren MockMultiDispatchInfo multiDispatchInfo(std::vector({&di1, &di2})); HardwareInterface::dispatchWalker(*pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); + CL_COMMAND_NDRANGE_KERNEL); hwParser.parseCommands(cmdStream, 0); @@ -1306,7 +1277,6 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredTh nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); auto sizeUsed = cmdStream.getUsed(); @@ -1363,7 +1333,6 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredTh nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); auto sizeUsed = cmdStream.getUsed(); diff --git a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp index 2f753d4568..dcfccd0486 100644 --- a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp +++ b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp @@ -53,7 +53,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed(); @@ -109,7 +108,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); auto iohUsed = ioh.getUsed(); @@ -135,7 +133,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); @@ -171,7 +168,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); ASSERT_NE(nullptr, blockedCommandsData); @@ -283,7 +279,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenBlockedQueueWhenParen nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); ASSERT_NE(nullptr, blockedCommandsData); @@ -316,7 +311,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispa nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); LinearStream *commandStream = &pCmdQ->getCS(0); @@ -374,7 +368,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenUsedSSHHeapWhenParent nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(UnitTestHelper::getDefaultSshUsage(), ssh.getUsed()); @@ -410,7 +403,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenNotUsedSSHHeapWhenPar nullptr, nullptr, nullptr, - pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(bufferMemory, ssh.getCpuBase()); diff --git a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp index a41fab8b70..c7da186cfd 100644 --- a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp +++ b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp @@ -421,7 +421,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand nullptr, nullptr, nullptr, - device->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); EXPECT_NE(nullptr, blockedCommandsData); diff --git a/unit_tests/helpers/timestamp_packet_tests.cpp b/unit_tests/helpers/timestamp_packet_tests.cpp index 1a102bdb5f..e8b4f85972 100644 --- a/unit_tests/helpers/timestamp_packet_tests.cpp +++ b/unit_tests/helpers/timestamp_packet_tests.cpp @@ -390,7 +390,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispat nullptr, nullptr, ×tampPacket, - device->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; @@ -435,7 +434,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketDisabledWh nullptr, nullptr, ×tampPacket, - device->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; @@ -885,7 +883,6 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh nullptr, nullptr, ×tamp7, - device->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; @@ -969,7 +966,6 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr nullptr, nullptr, ×tamp7, - device->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser;