Use kernel info to determine if AUX resolves required

This commit decouples the logic for AUX resolves from set kernel arg handlers.

Related-To: NEO-5107

Change-Id: I4c2912dc18633bcaefddb03cc6966e859d95262c
Signed-off-by: Slawomir Milczarek <slawomir.milczarek@intel.com>
This commit is contained in:
Slawomir Milczarek
2020-10-03 23:11:39 +02:00
committed by sys_ocldev
parent 5826d3e105
commit 5f7b763ce5
10 changed files with 85 additions and 24 deletions

View File

@ -367,8 +367,6 @@ cl_int Kernel::initialize() {
kernelArguments[i].type = BUFFER_OBJ;
usingBuffers = true;
allBufferArgsStateful &= static_cast<uint32_t>(argInfo.pureStatefulBufferAccess);
this->auxTranslationRequired |= !kernelInfo.kernelArgInfo[i].pureStatefulBufferAccess &&
HwHelper::renderCompressedBuffersSupported(hwInfo);
} else if (argInfo.isDeviceQueue) {
kernelArgHandlers[i] = &Kernel::setArgDevQueue;
kernelArguments[i].type = DEVICE_QUEUE_OBJ;
@ -377,10 +375,9 @@ cl_int Kernel::initialize() {
}
}
auxTranslationRequired = HwHelper::renderCompressedBuffersSupported(hwInfo) && hwHelper.requiresAuxResolves(kernelInfo);
if (DebugManager.flags.ForceAuxTranslationEnabled.get() != -1) {
auxTranslationRequired &= !!DebugManager.flags.ForceAuxTranslationEnabled.get();
} else {
auxTranslationRequired &= hwHelper.requiresAuxResolves();
}
if (auxTranslationRequired) {
program->getContextPtr()->setResolvesRequiredInKernels(true);

View File

@ -197,6 +197,7 @@ struct KernelInfo {
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
bool usesSsh = false;
bool requiresSshForBuffers = false;
bool hasStatelessAccessToHostMemory = false;
bool isVmeWorkload = false;
char *crossThreadData = nullptr;
size_t reqdWorkGroupSize[3] = {WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset};

View File

@ -20,6 +20,7 @@
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_timestamp_container.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include "test.h"
namespace NEO {
@ -66,13 +67,15 @@ struct BlitEnqueueTests : public ::testing::Test {
template <typename FamilyType>
void SetUpT() {
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
if (is32bit || !hwHelper.requiresAuxResolves()) {
if (is32bit) {
GTEST_SKIP();
}
REQUIRE_AUX_RESOLVES();
DebugManager.flags.EnableTimestampPacket.set(timestampPacketEnabled);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1);
DebugManager.flags.ForceAuxTranslationMode.set(1);
DebugManager.flags.RenderCompressedBuffersEnabled.set(1);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
DebugManager.flags.CsrDispatchMode.set(static_cast<int32_t>(DispatchMode::ImmediateDispatch));
DebugManager.flags.EnableLocalMemory.set(1);
@ -92,7 +95,6 @@ struct BlitEnqueueTests : public ::testing::Test {
auto mockCmdQueue = new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr);
commandQueue.reset(mockCmdQueue);
mockKernel = std::make_unique<MockKernelWithInternals>(*device, bcsMockContext.get());
mockKernel->mockKernel->auxTranslationRequired = true;
auto mockProgram = mockKernel->mockProgram;
mockProgram->setAllowNonUniform(true);
@ -108,13 +110,18 @@ struct BlitEnqueueTests : public ::testing::Test {
if (mockKernel->kernelInfo.kernelArgInfo.size() < buffers.size()) {
mockKernel->kernelInfo.kernelArgInfo.resize(buffers.size());
}
for (uint32_t i = 0; i < buffers.size(); i++) {
mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1);
mockKernel->kernelInfo.kernelArgInfo.at(i).isBuffer = true;
mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false;
}
mockKernel->mockKernel->initialize();
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
for (uint32_t i = 0; i < buffers.size(); i++) {
cl_mem clMem = buffers[i];
mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1);
mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false;
mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem);
}
}

View File

@ -814,9 +814,10 @@ HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationAndWithoutA
}
HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
if (!HwHelper::get(this->pDevice->getHardwareInfo().platform.eRenderCoreFamily).requiresAuxResolves()) {
GTEST_SKIP();
}
REQUIRE_AUX_RESOLVES();
DebugManagerStateRestore dbgRestore;
DebugManager.flags.RenderCompressedBuffersEnabled.set(1);
MyCmdQ<FamilyType> cmdQ(context, pClDevice);
size_t gws[3] = {1, 0, 0};
@ -831,20 +832,27 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe
buffer3.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
MockKernelWithInternals mockKernel(*pClDevice, context);
mockKernel.mockKernel->auxTranslationRequired = true;
mockKernel.kernelInfo.kernelArgInfo.resize(6);
for (auto &kernelInfo : mockKernel.kernelInfo.kernelArgInfo) {
kernelInfo.kernelArgPatchInfoVector.resize(1);
}
mockKernel.mockKernel->initialize();
mockKernel.kernelInfo.kernelArgInfo.at(0).isBuffer = true;
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
mockKernel.kernelInfo.kernelArgInfo.at(1).isBuffer = true;
mockKernel.kernelInfo.kernelArgInfo.at(1).pureStatefulBufferAccess = true;
mockKernel.kernelInfo.kernelArgInfo.at(2).isBuffer = true;
mockKernel.kernelInfo.kernelArgInfo.at(2).pureStatefulBufferAccess = false;
mockKernel.kernelInfo.kernelArgInfo.at(3).isBuffer = true;
mockKernel.kernelInfo.kernelArgInfo.at(3).pureStatefulBufferAccess = true;
mockKernel.kernelInfo.kernelArgInfo.at(4).isBuffer = true;
mockKernel.kernelInfo.kernelArgInfo.at(4).pureStatefulBufferAccess = false;
mockKernel.kernelInfo.kernelArgInfo.at(5).isBuffer = true;
mockKernel.kernelInfo.kernelArgInfo.at(5).pureStatefulBufferAccess = false;
mockKernel.mockKernel->initialize();
EXPECT_TRUE(mockKernel.mockKernel->auxTranslationRequired);
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on regular buffer - dont insert
mockKernel.mockKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on regular buffer - dont insert
mockKernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on BUFFER_COMPRESSED - insert

View File

@ -19,7 +19,16 @@ using HwHelperTestGen12Lp = HwHelperTest;
GEN12LPTEST_F(HwHelperTestGen12Lp, givenTglLpThenAuxTranslationIsRequired) {
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_TRUE(helper.requiresAuxResolves());
for (auto isPureStateful : {false, true}) {
KernelInfo kernelInfo{};
KernelArgInfo argInfo{};
argInfo.isBuffer = true;
argInfo.pureStatefulBufferAccess = isPureStateful;
kernelInfo.kernelArgInfo.push_back(std::move(argInfo));
EXPECT_EQ(!isPureStateful, helper.requiresAuxResolves(kernelInfo));
}
}
GEN12LPTEST_F(HwHelperTestGen12Lp, getMaxBarriersPerSliceReturnsCorrectSize) {

View File

@ -2744,11 +2744,11 @@ TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonState
capabilityTable.ftrRenderCompressedBuffers = true;
kernel.mockKernel->initialize();
EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired());
EXPECT_EQ(HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo), kernel.mockKernel->isAuxTranslationRequired());
DebugManager.flags.ForceAuxTranslationEnabled.set(-1);
kernel.mockKernel->initialize();
EXPECT_EQ(HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(), kernel.mockKernel->isAuxTranslationRequired());
EXPECT_EQ(HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo), kernel.mockKernel->isAuxTranslationRequired());
DebugManager.flags.ForceAuxTranslationEnabled.set(0);
kernel.mockKernel->initialize();
@ -2772,7 +2772,12 @@ TEST(KernelTest, WhenAuxTranslationIsRequiredThenKernelSetsRequiredResolvesInCon
kernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = false;
kernel.mockKernel->initialize();
EXPECT_TRUE(context->getResolvesRequiredInKernels());
if (HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo)) {
EXPECT_TRUE(context->getResolvesRequiredInKernels());
} else {
EXPECT_FALSE(context->getResolvesRequiredInKernels());
}
}
TEST(KernelTest, WhenAuxTranslationIsNotRequiredThenKernelDoesNotSetRequiredResolvesInContext) {
@ -2814,7 +2819,7 @@ TEST(KernelTest, givenDebugVariableSetWhenKernelHasStatefulBufferAccessThenMarkK
kernel.mockKernel->initialize();
if (HwHelper::get(localHwInfo.platform.eRenderCoreFamily).requiresAuxResolves()) {
if (HwHelper::get(localHwInfo.platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo)) {
EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired());
} else {
EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired());

View File

@ -8,10 +8,13 @@
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include "shared/source/device/device_info.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/test/unit_test/helpers/default_hw_info.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/kernel/kernel.h"
using namespace NEO;
@ -40,3 +43,14 @@ bool TestChecks::supportsDeviceEnqueue(const Context *pContext) {
bool TestChecks::supportsDeviceEnqueue(const std::unique_ptr<HardwareInfo> &pHardwareInfo) {
return pHardwareInfo->capabilityTable.supportsDeviceEnqueue;
}
bool TestChecks::supportsAuxResolves() {
KernelInfo kernelInfo{};
KernelArgInfo argInfo{};
argInfo.isBuffer = true;
argInfo.pureStatefulBufferAccess = false;
kernelInfo.kernelArgInfo.push_back(std::move(argInfo));
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
return hwHelper.requiresAuxResolves(kernelInfo);
}

View File

@ -23,6 +23,7 @@ bool supportsDeviceEnqueue(const ClDevice *pClDevice);
bool supportsDeviceEnqueue(const Context *pContext);
bool supportsDeviceEnqueue(const std::unique_ptr<HardwareInfo> &pHardwareInfo);
bool supportsPipes(const ClDevice *pClDevice);
bool supportsAuxResolves();
} // namespace TestChecks
} // namespace NEO
@ -49,3 +50,8 @@ bool supportsPipes(const ClDevice *pClDevice);
if (NEO::TestChecks::supportsDeviceEnqueue(param) == false) { \
GTEST_SKIP(); \
}
#define REQUIRE_AUX_RESOLVES() \
if (NEO::TestChecks::supportsAuxResolves() == false) { \
GTEST_SKIP(); \
}

View File

@ -27,6 +27,7 @@ class GraphicsAllocation;
struct AllocationData;
struct AllocationProperties;
struct HardwareCapabilities;
struct KernelInfo;
struct RootDeviceEnvironment;
struct PipeControlArgs;
@ -99,7 +100,7 @@ class HwHelper {
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
virtual uint32_t getMetricsLibraryGenId() const = 0;
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
virtual bool requiresAuxResolves() const = 0;
virtual bool requiresAuxResolves(const KernelInfo &kernelInfo) const = 0;
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
@ -133,6 +134,7 @@ class HwHelper {
protected:
virtual LocalMemoryAccessMode getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const = 0;
virtual bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const = 0;
HwHelper() = default;
};
@ -250,7 +252,7 @@ class HwHelperHw : public HwHelper {
uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const override;
bool requiresAuxResolves() const override;
bool requiresAuxResolves(const KernelInfo &kernelInfo) const override;
bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
@ -312,6 +314,7 @@ class HwHelperHw : public HwHelper {
protected:
LocalMemoryAccessMode getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const override;
bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const override;
static const AuxTranslationMode defaultAuxTranslationMode;
HwHelperHw() = default;

View File

@ -317,8 +317,8 @@ uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::requiresAuxResolves() const {
return true;
inline bool HwHelperHw<GfxFamily>::requiresAuxResolves(const KernelInfo &kernelInfo) const {
return hasStatelessAccessToBuffer(kernelInfo);
}
template <typename GfxFamily>
@ -447,6 +447,17 @@ inline LocalMemoryAccessMode HwHelperHw<GfxFamily>::getDefaultLocalMemoryAccessM
return LocalMemoryAccessMode::Default;
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const {
bool hasStatelessAccessToBuffer = false;
for (uint32_t i = 0; i < kernelInfo.kernelArgInfo.size(); ++i) {
if (kernelInfo.kernelArgInfo[i].isBuffer) {
hasStatelessAccessToBuffer |= !kernelInfo.kernelArgInfo[i].pureStatefulBufferAccess;
}
}
return hasStatelessAccessToBuffer;
}
template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
return sizeof(typename GfxFamily::PIPE_CONTROL);