Add Kernel restrictions

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2021-07-01 16:07:58 +00:00
committed by Compute-Runtime-Automation
parent aed3fada28
commit 64eb82efac
8 changed files with 574 additions and 17 deletions

View File

@@ -2670,4 +2670,87 @@ bool Kernel::areMultipleSubDevicesInContext() const {
return context ? context->containsMultipleSubDevices(clDevice.getRootDeviceIndex()) : false;
}
void Kernel::reconfigureKernel() {
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
if (kernelDescriptor.kernelAttributes.numGrfRequired == GrfConfig::LargeGrfNumber) {
maxKernelWorkGroupSize >>= 1;
}
this->containsStatelessWrites = kernelDescriptor.kernelAttributes.flags.usesStatelessWrites;
this->specialPipelineSelectMode = kernelDescriptor.extendedInfo.get() ? kernelDescriptor.extendedInfo->specialPipelineSelectModeRequired() : false;
}
bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const {
if (false == HwHelper::cacheFlushAfterWalkerSupported(commandQueue.getDevice().getHardwareInfo())) {
return false;
}
if (DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get() != -1) {
return !!DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get();
}
bool cmdQueueRequiresCacheFlush = commandQueue.getRequiresCacheFlushAfterWalker();
if (false == cmdQueueRequiresCacheFlush) {
return false;
}
if (commandQueue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable()) {
return false;
}
bool isMultiDevice = commandQueue.getContext().containsMultipleSubDevices(commandQueue.getDevice().getRootDeviceIndex());
if (false == isMultiDevice) {
return false;
}
bool isDefaultContext = (commandQueue.getContext().peekContextType() == ContextType::CONTEXT_TYPE_DEFAULT);
if (true == isDefaultContext) {
return false;
}
if (getProgram()->getGlobalSurface(commandQueue.getDevice().getRootDeviceIndex()) != nullptr) {
return true;
}
if (svmAllocationsRequireCacheFlush) {
return true;
}
size_t args = kernelArgRequiresCacheFlush.size();
for (size_t i = 0; i < args; i++) {
if (kernelArgRequiresCacheFlush[i] != nullptr) {
return true;
}
}
return false;
}
bool Kernel::requiresLimitedWorkgroupSize() const {
if (!this->isBuiltIn) {
return false;
}
if (this->auxTranslationDirection != AuxTranslationDirection::None) {
return false;
}
//if source is buffer in local memory, no need for limited workgroup
if (this->kernelInfo.getArgDescriptorAt(0).is<ArgDescriptor::ArgTPointer>()) {
if (this->getKernelArgInfo(0).object) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
auto buffer = castToObject<Buffer>(this->getKernelArgInfo(0u).object);
if (buffer && buffer->getGraphicsAllocation(rootDeviceIndex)->getMemoryPool() == MemoryPool::LocalMemory) {
return false;
}
}
}
//if we are reading from image no need for limited workgroup
if (this->kernelInfo.getArgDescriptorAt(0).is<ArgDescriptor::ArgTImage>()) {
return false;
}
return true;
}
void Kernel::updateAuxTranslationRequired() {
if (DebugManager.flags.EnableStatelessCompression.get()) {
if (hasDirectStatelessAccessToHostMemory() || hasIndirectStatelessAccessToHostMemory()) {
setAuxTranslationRequired(true);
}
}
}
} // namespace NEO

View File

@@ -11,11 +11,7 @@
#include "opencl/source/kernel/kernel.h"
namespace NEO {
bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const {
return false;
}
void Kernel::reconfigureKernel() {
}
int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) {
auto hwInfo = clDevice.getHardwareInfo();
auto &hwHelper = NEO::ClHwHelper::get(hwInfo.platform.eRenderCoreFamily);
@@ -39,14 +35,8 @@ bool Kernel::requiresPerDssBackedBuffer() const {
return DebugManager.flags.ForcePerDssBackedBufferProgramming.get();
}
bool Kernel::requiresLimitedWorkgroupSize() const {
return this->isBuiltIn;
}
int32_t Kernel::setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue) {
return CL_INVALID_VALUE;
}
void Kernel::updateAuxTranslationRequired() {
}
} // namespace NEO

View File

@@ -16,7 +16,7 @@ set(IGDRCL_SRCS_tests_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_info_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_pipe_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_svm_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_cache_flush_requirements_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_cache_flush_requirements_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_arg_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_immediate_arg_tests.cpp

View File

@@ -461,6 +461,150 @@ TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslatio
}
}
TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableStatelessCompression.set(1);
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
auto val = (cl_mem)&buffer;
auto pVal = &val;
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableStatelessCompression.set(1);
MockBuffer buffer;
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
auto val = (cl_mem)&buffer;
auto pVal = &val;
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableStatelessCompression.set(1);
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableStatelessCompression.set(1);
char data[128];
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableStatelessCompression.set(1);
pKernelInfo->hasIndirectStatelessAccess = false;
MockGraphicsAllocation gfxAllocation;
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
}
TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrueForHostMemoryAllocation) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableStatelessCompression.set(1);
pKernelInfo->hasIndirectStatelessAccess = true;
const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER,
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED,
GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY};
MockGraphicsAllocation gfxAllocation;
for (const auto type : allocationTypes) {
gfxAllocation.setAllocationType(type);
pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
EXPECT_TRUE(pKernel->hasIndirectStatelessAccessToHostMemory());
} else {
EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
}
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
pKernel->updateAuxTranslationRequired();
if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
} else {
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
}
pKernel->clearUnifiedMemoryExecInfo();
pKernel->setAuxTranslationRequired(false);
}
}
class KernelArgBufferFixtureBindless : public KernelArgBufferFixture {
public:
void SetUp() {

View File

@@ -6,23 +6,321 @@
*/
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/fixtures/context_fixture.h"
#include "opencl/test/unit_test/fixtures/platform_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "test.h"
using namespace NEO;
namespace NEO {
TEST(KernelWithCacheFlushTests, givenDeviceWhichDoesntRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedFalse) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
class KernelWithCacheFlushTests : public PlatformFixture, public testing::TestWithParam<std::tuple<const char *, const char *>> {
public:
void SetUp() override {
}
void TearDown() override {
}
void initializePlatform() {
PlatformFixture::SetUp();
}
void clearPlatform() {
PlatformFixture::TearDown();
}
};
TEST_F(KernelWithCacheFlushTests, givenDeviceWhichDoesntRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedFalse) {
initializePlatform();
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device.get());
MockCommandQueue queue;
MockContext mockContext(device);
MockCommandQueue queue(mockContext);
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue);
EXPECT_FALSE(flushRequired);
clearPlatform();
}
TEST_F(KernelWithCacheFlushTests, givenQueueWhichDoesntRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedFalse) {
initializePlatform();
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
MockCommandQueue queue(mockContext);
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue);
EXPECT_FALSE(flushRequired);
clearPlatform();
}
TEST_F(KernelWithCacheFlushTests, givenCacheFlushForAllQueuesDisabledWhenCheckIfKernelRequireFlushThenReturnedFalse) {
initializePlatform();
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(0);
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
MockCommandQueue queue(mockContext);
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue);
EXPECT_FALSE(flushRequired);
clearPlatform();
}
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushForMultiEngineEnabledWhenCheckIfKernelRequireFlushThenReturnedFalse) {
initializePlatform();
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
cmdQ->requiresCacheFlushAfterWalker = true;
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
ultCsr.multiOsContextCapable = true;
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
EXPECT_FALSE(flushRequired);
clearPlatform();
}
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushForSingleDeviceProgramWhenCheckIfKernelRequireFlushThenReturnedFalse) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.CreateMultipleSubDevices.set(1);
initializePlatform();
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
ultCsr.multiOsContextCapable = false;
cmdQ->requiresCacheFlushAfterWalker = true;
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
EXPECT_FALSE(flushRequired);
clearPlatform();
}
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushForDefaultTypeContextWhenCheckIfKernelRequireFlushThenReturnedFalse) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
uint32_t numDevices = 2;
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
initializePlatform();
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
cmdQ->requiresCacheFlushAfterWalker = true;
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
ultCsr.multiOsContextCapable = false;
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
EXPECT_FALSE(flushRequired);
clearPlatform();
}
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithNullGlobalSurfaceWhenCheckIfKernelRequireFlushThenReturnedFalse) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
uint32_t numDevices = 2;
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
initializePlatform();
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
cmdQ->requiresCacheFlushAfterWalker = true;
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
ultCsr.multiOsContextCapable = false;
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
EXPECT_FALSE(flushRequired);
clearPlatform();
}
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithGlobalSurfaceWhenCheckIfKernelRequireFlushThenReturnedTrue) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
uint32_t numDevices = 2;
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
initializePlatform();
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
void *allocPtr = reinterpret_cast<void *>(static_cast<uintptr_t>(6 * MemoryConstants::pageSize));
MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2};
mockKernel->mockProgram->setGlobalSurface(&globalAllocation);
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
cmdQ->requiresCacheFlushAfterWalker = true;
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
ultCsr.multiOsContextCapable = false;
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
EXPECT_TRUE(flushRequired);
mockKernel->mockProgram->setGlobalSurface(nullptr);
clearPlatform();
}
HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAddRequiredCommands, IsAtLeastXeHpCore) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.CreateMultipleSubDevices.set(2);
initializePlatform();
if (!pPlatform->getClDevice(0)->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker) {
clearPlatform();
GTEST_SKIP();
}
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
CsrDependencies csrDeps;
DispatchInfo dispatchInfo;
MultiDispatchInfo multiDispatchInfo(mockKernel->mockKernel);
dispatchInfo.setKernel(mockKernel->mockKernel);
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
multiDispatchInfo.push(dispatchInfo);
size_t initialSize = 0;
size_t sizeWithCacheFlush = 0;
size_t expectedDiff = sizeof(typename FamilyType::PIPE_CONTROL);
if constexpr (FamilyType::isUsingL3Control) {
expectedDiff += sizeof(typename FamilyType::L3_CONTROL) + sizeof(typename FamilyType::L3_FLUSH_ADDRESS_RANGE);
}
{
EXPECT_FALSE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ));
initialSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false);
}
{
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
void *allocPtr = reinterpret_cast<void *>(static_cast<uintptr_t>(6 * MemoryConstants::pageSize));
MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2};
mockKernel->mockProgram->setGlobalSurface(&globalAllocation);
cmdQ->requiresCacheFlushAfterWalker = true;
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
ultCsr.multiOsContextCapable = false;
EXPECT_TRUE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ));
sizeWithCacheFlush = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false);
}
EXPECT_EQ(initialSize + expectedDiff, sizeWithCacheFlush);
mockKernel->mockProgram->setGlobalSurface(nullptr);
clearPlatform();
}
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithAllocationsRequireCacheFlushFlagOnWhenCheckIfKernelRequireFlushThenReturnedTrue) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
uint32_t numDevices = 2;
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
initializePlatform();
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
cmdQ->requiresCacheFlushAfterWalker = true;
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
ultCsr.multiOsContextCapable = false;
mockKernel->mockKernel->svmAllocationsRequireCacheFlush = true;
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
EXPECT_TRUE(flushRequired);
clearPlatform();
}
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithAllocationsWhichRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedTrue) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
uint32_t numDevices = 2;
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
initializePlatform();
auto device = pPlatform->getClDevice(0);
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
MockContext mockContext(device);
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
cmdQ->requiresCacheFlushAfterWalker = true;
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
ultCsr.multiOsContextCapable = false;
mockKernel->mockKernel->svmAllocationsRequireCacheFlush = false;
mockKernel->mockKernel->kernelArgRequiresCacheFlush.resize(2);
MockGraphicsAllocation cacheRequiringAllocation;
mockKernel->mockKernel->kernelArgRequiresCacheFlush[1] = &cacheRequiringAllocation;
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
EXPECT_TRUE(flushRequired);
clearPlatform();
}
HWTEST_F(KernelWithCacheFlushTests,
givenEnableCacheFlushAfterWalkerForAllQueuesFlagSetWhenCheckIfKernelRequierFlushThenTrueIsAlwaysReturned) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1);
MockGraphicsAllocation cacheRequiringAllocation;
for (auto isMultiEngine : ::testing::Bool()) {
for (auto isMultiDevice : ::testing::Bool()) {
for (auto isDefaultContext : ::testing::Bool()) {
for (auto svmAllocationRequiresCacheFlush : ::testing::Bool()) {
for (auto kernelArgRequiresCacheFlush : ::testing::Bool()) {
auto deviceCount = (isMultiDevice ? 2 : 0);
auto contextType =
(isDefaultContext ? ContextType::CONTEXT_TYPE_DEFAULT : ContextType::CONTEXT_TYPE_SPECIALIZED);
GraphicsAllocation *kernelArg = (kernelArgRequiresCacheFlush ? &cacheRequiringAllocation : nullptr);
DebugManager.flags.CreateMultipleSubDevices.set(deviceCount);
initializePlatform();
auto device = pPlatform->getClDevice(0);
MockContext mockContext(device);
mockContext.contextType = contextType;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
cmdQ->requiresCacheFlushAfterWalker = true;
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
ultCsr.multiOsContextCapable = isMultiEngine;
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
mockKernel->mockKernel->svmAllocationsRequireCacheFlush = svmAllocationRequiresCacheFlush;
mockKernel->mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel->mockKernel->kernelArgRequiresCacheFlush[0] = kernelArg;
auto flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
EXPECT_TRUE(flushRequired);
clearPlatform();
}
}
}
}
}
}
} // namespace NEO

View File

@@ -3171,3 +3171,43 @@ TEST_F(KernelTests, givenKernelWithSimdEqual1WhenKernelCreatedThenMaxWorgGroupSi
EXPECT_LT(pKernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize);
EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG);
}
struct KernelLargeGrfTests : Test<ClDeviceFixture> {
void SetUp() override {
ClDeviceFixture::SetUp();
program = std::make_unique<MockProgram>(toClDeviceVector(*pClDevice));
pKernelInfo = std::make_unique<KernelInfo>();
pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 64;
}
void TearDown() override {
ClDeviceFixture::TearDown();
}
std::unique_ptr<MockProgram> program;
std::unique_ptr<KernelInfo> pKernelInfo;
SPatchExecutionEnvironment executionEnvironment = {};
};
HWTEST_F(KernelLargeGrfTests, GivenLargeGrfWhenGettingMaxWorkGroupSizeThenCorrectValueReturned) {
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4;
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.maxWorkGroupSize = 0;
{
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber - 1;
EXPECT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData);
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize);
}
{
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
EXPECT_EQ(CL_SUCCESS, kernel.initialize());
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize >> 1, *kernel.maxWorkGroupSizeForCrossThreadData);
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize >> 1, kernel.maxKernelWorkGroupSize);
}
}

View File

@@ -299,5 +299,6 @@ EnableUserFenceUseCtxId = -1
EnableResourceTags = 0
SetKmdWaitTimeout = -1
OverrideNotifyEnableForTagUpdatePostSync = -1
EnableCacheFlushAfterWalkerForAllQueues = -1
Force32BitDriverSupport = -1
OverrideCmdQueueSynchronousMode = -1

View File

@@ -206,6 +206,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then max
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushEveryEnqueueCount, -1, "If greater than 0, driver performs implicit flush every N submissions.")
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForNewResource, -1, "-1: platform specific, 0: force disable, 1: force enable")
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForIdleGpu, -1, "-1: platform specific, 0: force disable, 1: force enable")
DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalkerForAllQueues, -1, "Enable cache flush after walker even if queue doesn't require it")
/*DIRECT SUBMISSION FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")