compute-runtime/opencl/source/command_queue/command_queue_hw_xehp_plus.inl

74 lines
3.2 KiB
C++

/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/cache_flush_xehp_plus.inl"
#include "opencl/extensions/public/cl_ext_private.h"
#include "opencl/source/command_queue/command_queue_hw_base.inl"
#include "opencl/source/memory_manager/resource_surface.h"
namespace NEO {
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
}
template <>
void CommandQueueHw<Family>::submitCacheFlush(Surface **surfaces,
size_t numSurfaces,
LinearStream *commandStream,
uint64_t postSyncAddress) {
if constexpr (Family::isUsingL3Control) {
StackVec<L3Range, 128> subranges;
for (auto surface : CreateRange(surfaces, numSurfaces)) {
auto resource = reinterpret_cast<ResourceSurface *>(surface);
auto alloc = resource->getGraphicsAllocation();
coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, resource->resourceType);
}
for (size_t subrangeNumber = 0; subrangeNumber < subranges.size(); subrangeNumber += maxFlushSubrangeCount) {
size_t rangeCount = subranges.size() <= subrangeNumber + maxFlushSubrangeCount ? subranges.size() - subrangeNumber : maxFlushSubrangeCount;
Range<L3Range> range = CreateRange(subranges.begin() + subrangeNumber, rangeCount);
uint64_t postSyncAddressToFlush = 0;
if (rangeCount < maxFlushSubrangeCount || subranges.size() - subrangeNumber - maxFlushSubrangeCount == 0) {
postSyncAddressToFlush = postSyncAddress;
}
flushGpuCache<Family>(commandStream, range, postSyncAddressToFlush, device->getHardwareInfo());
}
}
}
template <>
bool CommandQueueHw<Family>::isCacheFlushCommand(uint32_t commandType) const {
return commandType == CL_COMMAND_RESOURCE_BARRIER;
}
template <>
LinearStream &getCommandStream<Family, CL_COMMAND_RESOURCE_BARRIER>(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling) {
size_t expectedSizeCS = 0;
bool usePostSync = false;
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<Family>(csrDeps);
usePostSync = true;
}
if constexpr (Family::isUsingL3Control) {
StackVec<L3Range, 128> subranges;
for (auto surface : CreateRange(surfaces, numSurfaces)) {
ResourceSurface *resource = reinterpret_cast<ResourceSurface *>(surface);
auto alloc = resource->getGraphicsAllocation();
coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, resource->resourceType);
}
expectedSizeCS += getSizeNeededToFlushGpuCache<Family>(subranges, usePostSync);
}
return commandQueue.getCS(expectedSizeCS);
}
} // namespace NEO