mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 00:24:58 +08:00
Add Kernel restrictions
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
aed3fada28
commit
64eb82efac
@@ -2670,4 +2670,87 @@ bool Kernel::areMultipleSubDevicesInContext() const {
|
||||
return context ? context->containsMultipleSubDevices(clDevice.getRootDeviceIndex()) : false;
|
||||
}
|
||||
|
||||
void Kernel::reconfigureKernel() {
|
||||
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
if (kernelDescriptor.kernelAttributes.numGrfRequired == GrfConfig::LargeGrfNumber) {
|
||||
maxKernelWorkGroupSize >>= 1;
|
||||
}
|
||||
this->containsStatelessWrites = kernelDescriptor.kernelAttributes.flags.usesStatelessWrites;
|
||||
this->specialPipelineSelectMode = kernelDescriptor.extendedInfo.get() ? kernelDescriptor.extendedInfo->specialPipelineSelectModeRequired() : false;
|
||||
}
|
||||
bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const {
|
||||
if (false == HwHelper::cacheFlushAfterWalkerSupported(commandQueue.getDevice().getHardwareInfo())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get() != -1) {
|
||||
return !!DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get();
|
||||
}
|
||||
|
||||
bool cmdQueueRequiresCacheFlush = commandQueue.getRequiresCacheFlushAfterWalker();
|
||||
if (false == cmdQueueRequiresCacheFlush) {
|
||||
return false;
|
||||
}
|
||||
if (commandQueue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable()) {
|
||||
return false;
|
||||
}
|
||||
bool isMultiDevice = commandQueue.getContext().containsMultipleSubDevices(commandQueue.getDevice().getRootDeviceIndex());
|
||||
if (false == isMultiDevice) {
|
||||
return false;
|
||||
}
|
||||
bool isDefaultContext = (commandQueue.getContext().peekContextType() == ContextType::CONTEXT_TYPE_DEFAULT);
|
||||
if (true == isDefaultContext) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (getProgram()->getGlobalSurface(commandQueue.getDevice().getRootDeviceIndex()) != nullptr) {
|
||||
return true;
|
||||
}
|
||||
if (svmAllocationsRequireCacheFlush) {
|
||||
return true;
|
||||
}
|
||||
size_t args = kernelArgRequiresCacheFlush.size();
|
||||
for (size_t i = 0; i < args; i++) {
|
||||
if (kernelArgRequiresCacheFlush[i] != nullptr) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Kernel::requiresLimitedWorkgroupSize() const {
|
||||
if (!this->isBuiltIn) {
|
||||
return false;
|
||||
}
|
||||
if (this->auxTranslationDirection != AuxTranslationDirection::None) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//if source is buffer in local memory, no need for limited workgroup
|
||||
if (this->kernelInfo.getArgDescriptorAt(0).is<ArgDescriptor::ArgTPointer>()) {
|
||||
if (this->getKernelArgInfo(0).object) {
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
auto buffer = castToObject<Buffer>(this->getKernelArgInfo(0u).object);
|
||||
if (buffer && buffer->getGraphicsAllocation(rootDeviceIndex)->getMemoryPool() == MemoryPool::LocalMemory) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//if we are reading from image no need for limited workgroup
|
||||
if (this->kernelInfo.getArgDescriptorAt(0).is<ArgDescriptor::ArgTImage>()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Kernel::updateAuxTranslationRequired() {
|
||||
if (DebugManager.flags.EnableStatelessCompression.get()) {
|
||||
if (hasDirectStatelessAccessToHostMemory() || hasIndirectStatelessAccessToHostMemory()) {
|
||||
setAuxTranslationRequired(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -11,11 +11,7 @@
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
||||
namespace NEO {
|
||||
bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const {
|
||||
return false;
|
||||
}
|
||||
void Kernel::reconfigureKernel() {
|
||||
}
|
||||
|
||||
int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) {
|
||||
auto hwInfo = clDevice.getHardwareInfo();
|
||||
auto &hwHelper = NEO::ClHwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
@@ -39,14 +35,8 @@ bool Kernel::requiresPerDssBackedBuffer() const {
|
||||
return DebugManager.flags.ForcePerDssBackedBufferProgramming.get();
|
||||
}
|
||||
|
||||
bool Kernel::requiresLimitedWorkgroupSize() const {
|
||||
return this->isBuiltIn;
|
||||
}
|
||||
|
||||
int32_t Kernel::setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
void Kernel::updateAuxTranslationRequired() {
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -16,7 +16,7 @@ set(IGDRCL_SRCS_tests_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_info_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_pipe_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_svm_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_cache_flush_requirements_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_cache_flush_requirements_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_arg_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_immediate_arg_tests.cpp
|
||||
|
||||
@@ -461,6 +461,150 @@ TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslatio
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
MockBuffer buffer;
|
||||
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
|
||||
|
||||
auto val = (cl_mem)&buffer;
|
||||
auto pVal = &val;
|
||||
|
||||
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
|
||||
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
|
||||
pKernel->updateAuxTranslationRequired();
|
||||
|
||||
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
MockBuffer buffer;
|
||||
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
|
||||
auto val = (cl_mem)&buffer;
|
||||
auto pVal = &val;
|
||||
|
||||
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
|
||||
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
|
||||
pKernel->updateAuxTranslationRequired();
|
||||
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
char data[128];
|
||||
void *ptr = &data;
|
||||
MockGraphicsAllocation gfxAllocation(ptr, 128);
|
||||
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
|
||||
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
|
||||
pKernel->updateAuxTranslationRequired();
|
||||
|
||||
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
char data[128];
|
||||
void *ptr = &data;
|
||||
MockGraphicsAllocation gfxAllocation(ptr, 128);
|
||||
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
|
||||
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
|
||||
pKernel->updateAuxTranslationRequired();
|
||||
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
pKernelInfo->hasIndirectStatelessAccess = false;
|
||||
|
||||
MockGraphicsAllocation gfxAllocation;
|
||||
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
|
||||
|
||||
pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
|
||||
|
||||
EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
|
||||
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
|
||||
pKernel->updateAuxTranslationRequired();
|
||||
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
}
|
||||
|
||||
TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrueForHostMemoryAllocation) {
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
DebugManager.flags.EnableStatelessCompression.set(1);
|
||||
|
||||
pKernelInfo->hasIndirectStatelessAccess = true;
|
||||
|
||||
const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER,
|
||||
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED,
|
||||
GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY};
|
||||
|
||||
MockGraphicsAllocation gfxAllocation;
|
||||
|
||||
for (const auto type : allocationTypes) {
|
||||
gfxAllocation.setAllocationType(type);
|
||||
|
||||
pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
|
||||
|
||||
if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
|
||||
EXPECT_TRUE(pKernel->hasIndirectStatelessAccessToHostMemory());
|
||||
} else {
|
||||
EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
|
||||
}
|
||||
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
|
||||
pKernel->updateAuxTranslationRequired();
|
||||
|
||||
if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
|
||||
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
|
||||
} else {
|
||||
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
|
||||
}
|
||||
|
||||
pKernel->clearUnifiedMemoryExecInfo();
|
||||
pKernel->setAuxTranslationRequired(false);
|
||||
}
|
||||
}
|
||||
|
||||
class KernelArgBufferFixtureBindless : public KernelArgBufferFixture {
|
||||
public:
|
||||
void SetUp() {
|
||||
|
||||
@@ -6,23 +6,321 @@
|
||||
*/
|
||||
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/context_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/platform_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
namespace NEO {
|
||||
|
||||
TEST(KernelWithCacheFlushTests, givenDeviceWhichDoesntRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedFalse) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
class KernelWithCacheFlushTests : public PlatformFixture, public testing::TestWithParam<std::tuple<const char *, const char *>> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
}
|
||||
void TearDown() override {
|
||||
}
|
||||
void initializePlatform() {
|
||||
PlatformFixture::SetUp();
|
||||
}
|
||||
void clearPlatform() {
|
||||
PlatformFixture::TearDown();
|
||||
}
|
||||
};
|
||||
TEST_F(KernelWithCacheFlushTests, givenDeviceWhichDoesntRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedFalse) {
|
||||
initializePlatform();
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device.get());
|
||||
MockCommandQueue queue;
|
||||
MockContext mockContext(device);
|
||||
MockCommandQueue queue(mockContext);
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue);
|
||||
EXPECT_FALSE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
TEST_F(KernelWithCacheFlushTests, givenQueueWhichDoesntRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedFalse) {
|
||||
initializePlatform();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
MockCommandQueue queue(mockContext);
|
||||
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue);
|
||||
EXPECT_FALSE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
TEST_F(KernelWithCacheFlushTests, givenCacheFlushForAllQueuesDisabledWhenCheckIfKernelRequireFlushThenReturnedFalse) {
|
||||
initializePlatform();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(0);
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
MockCommandQueue queue(mockContext);
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue);
|
||||
|
||||
EXPECT_FALSE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushForMultiEngineEnabledWhenCheckIfKernelRequireFlushThenReturnedFalse) {
|
||||
initializePlatform();
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
|
||||
cmdQ->requiresCacheFlushAfterWalker = true;
|
||||
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
|
||||
ultCsr.multiOsContextCapable = true;
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
|
||||
|
||||
EXPECT_FALSE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
|
||||
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushForSingleDeviceProgramWhenCheckIfKernelRequireFlushThenReturnedFalse) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(1);
|
||||
initializePlatform();
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
|
||||
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
|
||||
ultCsr.multiOsContextCapable = false;
|
||||
cmdQ->requiresCacheFlushAfterWalker = true;
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
|
||||
|
||||
EXPECT_FALSE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
|
||||
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushForDefaultTypeContextWhenCheckIfKernelRequireFlushThenReturnedFalse) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
uint32_t numDevices = 2;
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
|
||||
initializePlatform();
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
|
||||
cmdQ->requiresCacheFlushAfterWalker = true;
|
||||
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
|
||||
ultCsr.multiOsContextCapable = false;
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
|
||||
|
||||
EXPECT_FALSE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithNullGlobalSurfaceWhenCheckIfKernelRequireFlushThenReturnedFalse) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
uint32_t numDevices = 2;
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
|
||||
initializePlatform();
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
|
||||
cmdQ->requiresCacheFlushAfterWalker = true;
|
||||
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
|
||||
ultCsr.multiOsContextCapable = false;
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
|
||||
|
||||
EXPECT_FALSE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithGlobalSurfaceWhenCheckIfKernelRequireFlushThenReturnedTrue) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
uint32_t numDevices = 2;
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
|
||||
initializePlatform();
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
|
||||
|
||||
void *allocPtr = reinterpret_cast<void *>(static_cast<uintptr_t>(6 * MemoryConstants::pageSize));
|
||||
MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2};
|
||||
mockKernel->mockProgram->setGlobalSurface(&globalAllocation);
|
||||
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
|
||||
cmdQ->requiresCacheFlushAfterWalker = true;
|
||||
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
|
||||
ultCsr.multiOsContextCapable = false;
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
|
||||
|
||||
EXPECT_TRUE(flushRequired);
|
||||
mockKernel->mockProgram->setGlobalSurface(nullptr);
|
||||
clearPlatform();
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAddRequiredCommands, IsAtLeastXeHpCore) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
|
||||
initializePlatform();
|
||||
|
||||
if (!pPlatform->getClDevice(0)->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker) {
|
||||
clearPlatform();
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
|
||||
|
||||
CsrDependencies csrDeps;
|
||||
DispatchInfo dispatchInfo;
|
||||
MultiDispatchInfo multiDispatchInfo(mockKernel->mockKernel);
|
||||
dispatchInfo.setKernel(mockKernel->mockKernel);
|
||||
dispatchInfo.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
|
||||
size_t initialSize = 0;
|
||||
size_t sizeWithCacheFlush = 0;
|
||||
size_t expectedDiff = sizeof(typename FamilyType::PIPE_CONTROL);
|
||||
if constexpr (FamilyType::isUsingL3Control) {
|
||||
expectedDiff += sizeof(typename FamilyType::L3_CONTROL) + sizeof(typename FamilyType::L3_FLUSH_ADDRESS_RANGE);
|
||||
}
|
||||
|
||||
{
|
||||
EXPECT_FALSE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ));
|
||||
|
||||
initialSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false);
|
||||
}
|
||||
|
||||
{
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
void *allocPtr = reinterpret_cast<void *>(static_cast<uintptr_t>(6 * MemoryConstants::pageSize));
|
||||
MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2};
|
||||
mockKernel->mockProgram->setGlobalSurface(&globalAllocation);
|
||||
|
||||
cmdQ->requiresCacheFlushAfterWalker = true;
|
||||
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
|
||||
ultCsr.multiOsContextCapable = false;
|
||||
EXPECT_TRUE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ));
|
||||
|
||||
sizeWithCacheFlush = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false);
|
||||
}
|
||||
|
||||
EXPECT_EQ(initialSize + expectedDiff, sizeWithCacheFlush);
|
||||
|
||||
mockKernel->mockProgram->setGlobalSurface(nullptr);
|
||||
clearPlatform();
|
||||
}
|
||||
|
||||
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithAllocationsRequireCacheFlushFlagOnWhenCheckIfKernelRequireFlushThenReturnedTrue) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
uint32_t numDevices = 2;
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
|
||||
initializePlatform();
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
|
||||
cmdQ->requiresCacheFlushAfterWalker = true;
|
||||
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
|
||||
ultCsr.multiOsContextCapable = false;
|
||||
mockKernel->mockKernel->svmAllocationsRequireCacheFlush = true;
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
|
||||
|
||||
EXPECT_TRUE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithAllocationsWhichRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedTrue) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
uint32_t numDevices = 2;
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(numDevices);
|
||||
initializePlatform();
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
MockContext mockContext(device);
|
||||
mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED;
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
|
||||
cmdQ->requiresCacheFlushAfterWalker = true;
|
||||
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
|
||||
ultCsr.multiOsContextCapable = false;
|
||||
mockKernel->mockKernel->svmAllocationsRequireCacheFlush = false;
|
||||
mockKernel->mockKernel->kernelArgRequiresCacheFlush.resize(2);
|
||||
MockGraphicsAllocation cacheRequiringAllocation;
|
||||
mockKernel->mockKernel->kernelArgRequiresCacheFlush[1] = &cacheRequiringAllocation;
|
||||
bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
|
||||
|
||||
EXPECT_TRUE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
|
||||
HWTEST_F(KernelWithCacheFlushTests,
|
||||
givenEnableCacheFlushAfterWalkerForAllQueuesFlagSetWhenCheckIfKernelRequierFlushThenTrueIsAlwaysReturned) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
|
||||
DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1);
|
||||
MockGraphicsAllocation cacheRequiringAllocation;
|
||||
|
||||
for (auto isMultiEngine : ::testing::Bool()) {
|
||||
for (auto isMultiDevice : ::testing::Bool()) {
|
||||
for (auto isDefaultContext : ::testing::Bool()) {
|
||||
for (auto svmAllocationRequiresCacheFlush : ::testing::Bool()) {
|
||||
for (auto kernelArgRequiresCacheFlush : ::testing::Bool()) {
|
||||
auto deviceCount = (isMultiDevice ? 2 : 0);
|
||||
auto contextType =
|
||||
(isDefaultContext ? ContextType::CONTEXT_TYPE_DEFAULT : ContextType::CONTEXT_TYPE_SPECIALIZED);
|
||||
GraphicsAllocation *kernelArg = (kernelArgRequiresCacheFlush ? &cacheRequiringAllocation : nullptr);
|
||||
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(deviceCount);
|
||||
initializePlatform();
|
||||
|
||||
auto device = pPlatform->getClDevice(0);
|
||||
MockContext mockContext(device);
|
||||
mockContext.contextType = contextType;
|
||||
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&mockContext, device, nullptr);
|
||||
cmdQ->requiresCacheFlushAfterWalker = true;
|
||||
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(cmdQ->getGpgpuCommandStreamReceiver());
|
||||
ultCsr.multiOsContextCapable = isMultiEngine;
|
||||
|
||||
auto mockKernel = std::make_unique<MockKernelWithInternals>(*device);
|
||||
mockKernel->mockKernel->svmAllocationsRequireCacheFlush = svmAllocationRequiresCacheFlush;
|
||||
mockKernel->mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
mockKernel->mockKernel->kernelArgRequiresCacheFlush[0] = kernelArg;
|
||||
|
||||
auto flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get());
|
||||
EXPECT_TRUE(flushRequired);
|
||||
clearPlatform();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -3171,3 +3171,43 @@ TEST_F(KernelTests, givenKernelWithSimdEqual1WhenKernelCreatedThenMaxWorgGroupSi
|
||||
EXPECT_LT(pKernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize);
|
||||
EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG);
|
||||
}
|
||||
|
||||
struct KernelLargeGrfTests : Test<ClDeviceFixture> {
|
||||
void SetUp() override {
|
||||
ClDeviceFixture::SetUp();
|
||||
program = std::make_unique<MockProgram>(toClDeviceVector(*pClDevice));
|
||||
pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 64;
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
ClDeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
std::unique_ptr<MockProgram> program;
|
||||
std::unique_ptr<KernelInfo> pKernelInfo;
|
||||
SPatchExecutionEnvironment executionEnvironment = {};
|
||||
};
|
||||
|
||||
HWTEST_F(KernelLargeGrfTests, GivenLargeGrfWhenGettingMaxWorkGroupSizeThenCorrectValueReturned) {
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4;
|
||||
pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.maxWorkGroupSize = 0;
|
||||
{
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber - 1;
|
||||
EXPECT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData);
|
||||
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize);
|
||||
}
|
||||
|
||||
{
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
|
||||
EXPECT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize >> 1, *kernel.maxWorkGroupSizeForCrossThreadData);
|
||||
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize >> 1, kernel.maxKernelWorkGroupSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -299,5 +299,6 @@ EnableUserFenceUseCtxId = -1
|
||||
EnableResourceTags = 0
|
||||
SetKmdWaitTimeout = -1
|
||||
OverrideNotifyEnableForTagUpdatePostSync = -1
|
||||
EnableCacheFlushAfterWalkerForAllQueues = -1
|
||||
Force32BitDriverSupport = -1
|
||||
OverrideCmdQueueSynchronousMode = -1
|
||||
|
||||
@@ -206,6 +206,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then max
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushEveryEnqueueCount, -1, "If greater than 0, driver performs implicit flush every N submissions.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForNewResource, -1, "-1: platform specific, 0: force disable, 1: force enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForIdleGpu, -1, "-1: platform specific, 0: force disable, 1: force enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalkerForAllQueues, -1, "Enable cache flush after walker even if queue doesn't require it")
|
||||
|
||||
/*DIRECT SUBMISSION FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")
|
||||
|
||||
Reference in New Issue
Block a user