Refactor around cache flush

Change-Id: Iff32af0111375f4ffc804c82e6d753d57fe94e80
This commit is contained in:
Chodor, Jaroslaw
2019-01-31 14:47:55 +01:00
committed by sys_ocldev
parent 6ef2822643
commit 7d04159f76
8 changed files with 134 additions and 38 deletions

View File

@@ -432,6 +432,7 @@ template <typename GfxFamily>
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) {
size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + KernelCommandsHelper<GfxFamily>::getSizeRequiredCS(pKernel) +
sizeof(PIPE_CONTROL) * (KernelCommandsHelper<GfxFamily>::isPipeControlWArequired() ? 2 : 1);
size += KernelCommandsHelper<GfxFamily>::getSizeRequiredForCacheFlush(pKernel, 0U, 0U);
size += PreemptionHelper::getPreemptionWaCsSize<GfxFamily>(commandQueue.getDevice());
if (reserveProfilingCmdsSpace) {
size += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);

View File

@@ -213,7 +213,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
pPipeControlCmd->setCommandStreamerStallEnable(true);
}
KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, &kernel);
KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, &kernel, 0U, 0U);
currentDispatchIndex++;
}

View File

@@ -141,6 +141,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
Kernel &kernel);
static size_t getSizeRequiredCS(const Kernel *kernel);
static size_t getSizeRequiredForCacheFlush(const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData);
static bool isPipeControlWArequired();
static size_t getSizeRequiredDSH(
const Kernel &kernel);
@@ -200,7 +201,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData);
static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize);
static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel);
static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData);
static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t);
static const uint32_t alignIndirectStatePointer = 64 * sizeof(uint8_t);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018 Intel Corporation
* Copyright (C) 2018-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -47,12 +47,14 @@ template <typename GfxFamily>
size_t KernelCommandsHelper<GfxFamily>::getSizeRequiredCS(const Kernel *kernel) {
size_t size = 2 * sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) +
sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD);
if (kernel->requiresCacheFlushCommand()) {
size += sizeof(typename GfxFamily::PIPE_CONTROL);
}
return size;
}
template <typename GfxFamily>
size_t KernelCommandsHelper<GfxFamily>::getSizeRequiredForCacheFlush(const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData) {
return kernel->requiresCacheFlushCommand() ? sizeof(typename GfxFamily::PIPE_CONTROL) : 0;
}
template <typename GfxFamily>
void KernelCommandsHelper<GfxFamily>::sendMediaStateFlush(
LinearStream &commandStream,
@@ -161,7 +163,7 @@ bool KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(uint32
}
template <typename GfxFamily>
void KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel) {
void KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const Kernel *kernel, uint64_t postSyncAddress, uint64_t postSyncData) {
if (kernel->requiresCacheFlushCommand()) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(commandStream->getSpace(sizeof(PIPE_CONTROL)));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018 Intel Corporation
* Copyright (C) 2018-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -2144,24 +2144,51 @@ bool Kernel::platformSupportCacheFlushAfterWalker() const {
}
bool Kernel::requiresCacheFlushCommand() const {
if (platformSupportCacheFlushAfterWalker()) {
if (getProgram()->getGlobalSurface() != nullptr) {
if (false == platformSupportCacheFlushAfterWalker()) {
return false;
}
if (getProgram()->getGlobalSurface() != nullptr) {
return true;
}
if (svmAllocationsRequireCacheFlush) {
return true;
}
size_t args = kernelArgRequiresCacheFlush.size();
for (size_t i = 0; i < args; i++) {
if (kernelArgRequiresCacheFlush[i] != nullptr) {
return true;
}
if (svmAllocationsRequireCacheFlush) {
return true;
}
size_t args = kernelArgRequiresCacheFlush.size();
for (size_t i = 0; i < args; i++) {
if (kernelArgRequiresCacheFlush[i] != nullptr) {
return true;
}
}
}
return false;
}
bool Kernel::allocationForCacheFlush(GraphicsAllocation *argAllocation) {
void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const {
if (false == platformSupportCacheFlushAfterWalker()) {
return;
}
for (GraphicsAllocation *alloc : this->kernelArgRequiresCacheFlush) {
if (nullptr == alloc) {
continue;
}
out.push_back(alloc);
}
auto global = getProgram()->getGlobalSurface();
if (global != nullptr) {
out.push_back(global);
}
if (svmAllocationsRequireCacheFlush) {
for (GraphicsAllocation *alloc : kernelSvmGfxAllocations) {
if (allocationForCacheFlush(alloc)) {
out.push_back(alloc);
}
}
}
}
bool Kernel::allocationForCacheFlush(GraphicsAllocation *argAllocation) const {
if (argAllocation->flushL3Required || argAllocation->isMemObjectsAllocationWithWritableFlags()) {
return true;
}

View File

@@ -376,6 +376,9 @@ class Kernel : public BaseObject<_cl_kernel> {
bool requiresCacheFlushCommand() const;
using CacheFlushAllocationsVec = StackVec<GraphicsAllocation *, 32>;
void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const;
protected:
struct ObjectCounts {
uint32_t imageCount;
@@ -465,7 +468,7 @@ class Kernel : public BaseObject<_cl_kernel> {
bool platformSupportCacheFlushAfterWalker() const;
void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation);
bool allocationForCacheFlush(GraphicsAllocation *argAllocation);
bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const;
Program *program;
Context *context;
const Device &device;