mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 00:58:39 +08:00
Improve obtaining LinearStream during enqueue call
- Move logic to enqueueHandler to cover all scenarios - Create BlockedCommandsData not only for Kernel enqueue - KernelOperation cleanup Change-Id: Ie4a673cbbc986c685996a38ab296444d38e7bbd5 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
1460713d69
commit
95c2dcd8b0
@@ -337,7 +337,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
KernelOperation *blockedCommandsData,
|
||||
std::unique_ptr<KernelOperation> &blockedCommandsData,
|
||||
EventsRequest &eventsRequest,
|
||||
bool slmUsed,
|
||||
EventBuilder &externalEventBuilder,
|
||||
@@ -385,6 +385,29 @@ class CommandQueueHw : public CommandQueue {
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection);
|
||||
|
||||
template <uint32_t commandType>
|
||||
LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool profilingRequired,
|
||||
bool perfCountersRequired, bool blitEnqueue, bool blockedQueue,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
std::unique_ptr<KernelOperation> &blockedCommandsData,
|
||||
Surface **surfaces, size_t numSurfaces) {
|
||||
LinearStream *commandStream = nullptr;
|
||||
if (blockedQueue && !multiDispatchInfo.empty()) {
|
||||
constexpr size_t additionalAllocationSize = CSRequirements::csOverfetchSize;
|
||||
constexpr size_t allocationSize = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize;
|
||||
commandStream = new LinearStream();
|
||||
|
||||
auto &gpgpuCsr = getGpgpuCommandStreamReceiver();
|
||||
gpgpuCsr.ensureCommandBufferAllocation(*commandStream, allocationSize, additionalAllocationSize);
|
||||
|
||||
blockedCommandsData = std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
|
||||
} else {
|
||||
commandStream = &getCommandStream<GfxFamily, commandType>(*this, csrDependencies, profilingRequired, perfCountersRequired,
|
||||
blitEnqueue, multiDispatchInfo, surfaces, numSurfaces);
|
||||
}
|
||||
return commandStream;
|
||||
}
|
||||
|
||||
private:
|
||||
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
|
||||
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
|
||||
@@ -414,7 +437,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
bool blockQueue,
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
CsrDependencies &csrDeps,
|
||||
KernelOperation *&blockedCommandsData,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TimestampPacketContainer &previousTimestampPacketNodes,
|
||||
PreemptionMode preemption);
|
||||
};
|
||||
|
||||
@@ -165,7 +165,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
bool profilingRequired = (this->isProfilingEnabled() && event != nullptr);
|
||||
bool perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr);
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
std::unique_ptr<KernelOperation> blockedCommandsData;
|
||||
std::unique_ptr<PrintfHandler> printfHandler;
|
||||
bool slmUsed = multiDispatchInfo.usesSlm() || parentKernel;
|
||||
auto preemption = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
|
||||
@@ -227,8 +227,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
}
|
||||
|
||||
auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, csrDeps, profilingRequired, perfCountersRequired,
|
||||
blitEnqueue, multiDispatchInfo, surfacesForResidency, numSurfaceForResidency);
|
||||
auto &commandStream = *obtainCommandStream<commandType>(csrDeps, profilingRequired, perfCountersRequired, blitEnqueue, blockQueue,
|
||||
multiDispatchInfo, blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
|
||||
auto commandStreamStart = commandStream.getUsed();
|
||||
|
||||
if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
@@ -241,7 +241,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType);
|
||||
} else if (multiDispatchInfo.empty() == false) {
|
||||
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
|
||||
hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData,
|
||||
hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
|
||||
previousTimestampPacketNodes, preemption);
|
||||
} else if (isCacheFlushCommand(commandType)) {
|
||||
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
|
||||
@@ -396,7 +396,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
bool blockQueue,
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
CsrDependencies &csrDeps,
|
||||
KernelOperation *&blockedCommandsData,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TimestampPacketContainer &previousTimestampPacketNodes,
|
||||
PreemptionMode preemption) {
|
||||
TagNode<HwPerfCounter> *hwPerfCounter = nullptr;
|
||||
@@ -437,13 +437,12 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
*this,
|
||||
multiDispatchInfo,
|
||||
csrDeps,
|
||||
&blockedCommandsData,
|
||||
blockedCommandsData,
|
||||
hwTimeStamps,
|
||||
hwPerfCounter,
|
||||
&previousTimestampPacketNodes,
|
||||
timestampPacketContainer.get(),
|
||||
preemption,
|
||||
blockQueue,
|
||||
commandType);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
@@ -738,7 +737,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
KernelOperation *blockedCommandsData,
|
||||
std::unique_ptr<KernelOperation> &blockedCommandsData,
|
||||
EventsRequest &eventsRequest,
|
||||
bool slmUsed,
|
||||
EventBuilder &externalEventBuilder,
|
||||
@@ -795,10 +794,9 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
allSurfaces.push_back(surface->duplicate());
|
||||
}
|
||||
PreemptionMode preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
|
||||
auto kernelOperation = std::unique_ptr<KernelOperation>(blockedCommandsData); // marking ownership
|
||||
auto cmd = std::make_unique<CommandComputeKernel>(
|
||||
*this,
|
||||
std::move(kernelOperation),
|
||||
std::move(blockedCommandsData),
|
||||
allSurfaces,
|
||||
shouldFlushDC(commandType, printfHandler.get()),
|
||||
slmUsed,
|
||||
|
||||
@@ -39,14 +39,13 @@ class HardwareInterface {
|
||||
CommandQueue &commandQueue,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
const CsrDependencies &csrDependencies,
|
||||
KernelOperation **blockedCommandsData,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||
TagNode<HwPerfCounter> *hwPerfCounter,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||
PreemptionMode preemptionMode,
|
||||
bool blockQueue,
|
||||
uint32_t commandType = 0);
|
||||
uint32_t commandType);
|
||||
|
||||
static void getDefaultDshSpace(
|
||||
const size_t &offsetInterfaceDescriptorTable,
|
||||
|
||||
@@ -26,13 +26,12 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
CommandQueue &commandQueue,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
const CsrDependencies &csrDependencies,
|
||||
KernelOperation **blockedCommandsData,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||
TagNode<HwPerfCounter> *hwPerfCounter,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||
PreemptionMode preemptionMode,
|
||||
bool blockQueue,
|
||||
uint32_t commandType) {
|
||||
|
||||
LinearStream *commandStream = nullptr;
|
||||
@@ -49,19 +48,11 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
}
|
||||
|
||||
// Allocate command stream and indirect heaps
|
||||
obtainIndirectHeaps(commandQueue, multiDispatchInfo, blockQueue, dsh, ioh, ssh);
|
||||
if (blockQueue) {
|
||||
constexpr static auto additionalAllocationSize = CSRequirements::csOverfetchSize;
|
||||
constexpr static auto allocationSize = MemoryConstants::pageSize64k - additionalAllocationSize;
|
||||
commandStream = new LinearStream();
|
||||
commandQueue.getGpgpuCommandStreamReceiver().ensureCommandBufferAllocation(*commandStream, allocationSize, additionalAllocationSize);
|
||||
|
||||
using UniqueIH = std::unique_ptr<IndirectHeap>;
|
||||
*blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh),
|
||||
UniqueIH(ssh), *commandQueue.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
if (parentKernel) {
|
||||
(*blockedCommandsData)->doNotFreeISH = true;
|
||||
}
|
||||
bool blockedQueue = (blockedCommandsData != nullptr);
|
||||
obtainIndirectHeaps(commandQueue, multiDispatchInfo, blockedQueue, dsh, ioh, ssh);
|
||||
if (blockedQueue) {
|
||||
blockedCommandsData->setHeaps(dsh, ioh, ssh);
|
||||
commandStream = blockedCommandsData->commandStream.get();
|
||||
} else {
|
||||
commandStream = &commandQueue.getCS(0);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user