mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Use kernel info to determine if AUX resolves required
This commit decouples the logic for AUX resolves from set kernel arg handlers. Related-To: NEO-5107 Change-Id: I4c2912dc18633bcaefddb03cc6966e859d95262c Signed-off-by: Slawomir Milczarek <slawomir.milczarek@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
5826d3e105
commit
5f7b763ce5
@ -367,8 +367,6 @@ cl_int Kernel::initialize() {
|
||||
kernelArguments[i].type = BUFFER_OBJ;
|
||||
usingBuffers = true;
|
||||
allBufferArgsStateful &= static_cast<uint32_t>(argInfo.pureStatefulBufferAccess);
|
||||
this->auxTranslationRequired |= !kernelInfo.kernelArgInfo[i].pureStatefulBufferAccess &&
|
||||
HwHelper::renderCompressedBuffersSupported(hwInfo);
|
||||
} else if (argInfo.isDeviceQueue) {
|
||||
kernelArgHandlers[i] = &Kernel::setArgDevQueue;
|
||||
kernelArguments[i].type = DEVICE_QUEUE_OBJ;
|
||||
@ -377,10 +375,9 @@ cl_int Kernel::initialize() {
|
||||
}
|
||||
}
|
||||
|
||||
auxTranslationRequired = HwHelper::renderCompressedBuffersSupported(hwInfo) && hwHelper.requiresAuxResolves(kernelInfo);
|
||||
if (DebugManager.flags.ForceAuxTranslationEnabled.get() != -1) {
|
||||
auxTranslationRequired &= !!DebugManager.flags.ForceAuxTranslationEnabled.get();
|
||||
} else {
|
||||
auxTranslationRequired &= hwHelper.requiresAuxResolves();
|
||||
}
|
||||
if (auxTranslationRequired) {
|
||||
program->getContextPtr()->setResolvesRequiredInKernels(true);
|
||||
|
@ -197,6 +197,7 @@ struct KernelInfo {
|
||||
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
|
||||
bool usesSsh = false;
|
||||
bool requiresSshForBuffers = false;
|
||||
bool hasStatelessAccessToHostMemory = false;
|
||||
bool isVmeWorkload = false;
|
||||
char *crossThreadData = nullptr;
|
||||
size_t reqdWorkGroupSize[3] = {WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset};
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_timestamp_container.h"
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
#include "test.h"
|
||||
|
||||
namespace NEO {
|
||||
@ -66,13 +67,15 @@ struct BlitEnqueueTests : public ::testing::Test {
|
||||
|
||||
template <typename FamilyType>
|
||||
void SetUpT() {
|
||||
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
if (is32bit || !hwHelper.requiresAuxResolves()) {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
REQUIRE_AUX_RESOLVES();
|
||||
|
||||
DebugManager.flags.EnableTimestampPacket.set(timestampPacketEnabled);
|
||||
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1);
|
||||
DebugManager.flags.ForceAuxTranslationMode.set(1);
|
||||
DebugManager.flags.RenderCompressedBuffersEnabled.set(1);
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
|
||||
DebugManager.flags.CsrDispatchMode.set(static_cast<int32_t>(DispatchMode::ImmediateDispatch));
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
@ -92,7 +95,6 @@ struct BlitEnqueueTests : public ::testing::Test {
|
||||
auto mockCmdQueue = new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr);
|
||||
commandQueue.reset(mockCmdQueue);
|
||||
mockKernel = std::make_unique<MockKernelWithInternals>(*device, bcsMockContext.get());
|
||||
mockKernel->mockKernel->auxTranslationRequired = true;
|
||||
auto mockProgram = mockKernel->mockProgram;
|
||||
mockProgram->setAllowNonUniform(true);
|
||||
|
||||
@ -108,13 +110,18 @@ struct BlitEnqueueTests : public ::testing::Test {
|
||||
if (mockKernel->kernelInfo.kernelArgInfo.size() < buffers.size()) {
|
||||
mockKernel->kernelInfo.kernelArgInfo.resize(buffers.size());
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < buffers.size(); i++) {
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1);
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).isBuffer = true;
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false;
|
||||
}
|
||||
|
||||
mockKernel->mockKernel->initialize();
|
||||
EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired);
|
||||
|
||||
for (uint32_t i = 0; i < buffers.size(); i++) {
|
||||
cl_mem clMem = buffers[i];
|
||||
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1);
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false;
|
||||
mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem);
|
||||
}
|
||||
}
|
||||
|
@ -814,9 +814,10 @@ HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationAndWithoutA
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
|
||||
if (!HwHelper::get(this->pDevice->getHardwareInfo().platform.eRenderCoreFamily).requiresAuxResolves()) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
REQUIRE_AUX_RESOLVES();
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
DebugManager.flags.RenderCompressedBuffersEnabled.set(1);
|
||||
|
||||
MyCmdQ<FamilyType> cmdQ(context, pClDevice);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
@ -831,20 +832,27 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe
|
||||
buffer3.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice, context);
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.resize(6);
|
||||
for (auto &kernelInfo : mockKernel.kernelInfo.kernelArgInfo) {
|
||||
kernelInfo.kernelArgPatchInfoVector.resize(1);
|
||||
}
|
||||
|
||||
mockKernel.mockKernel->initialize();
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(1).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(1).pureStatefulBufferAccess = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(2).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(2).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(3).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(3).pureStatefulBufferAccess = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(4).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(4).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(5).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(5).pureStatefulBufferAccess = false;
|
||||
|
||||
mockKernel.mockKernel->initialize();
|
||||
EXPECT_TRUE(mockKernel.mockKernel->auxTranslationRequired);
|
||||
|
||||
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on regular buffer - dont insert
|
||||
mockKernel.mockKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on regular buffer - dont insert
|
||||
mockKernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on BUFFER_COMPRESSED - insert
|
||||
|
@ -19,7 +19,16 @@ using HwHelperTestGen12Lp = HwHelperTest;
|
||||
|
||||
GEN12LPTEST_F(HwHelperTestGen12Lp, givenTglLpThenAuxTranslationIsRequired) {
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_TRUE(helper.requiresAuxResolves());
|
||||
|
||||
for (auto isPureStateful : {false, true}) {
|
||||
KernelInfo kernelInfo{};
|
||||
KernelArgInfo argInfo{};
|
||||
argInfo.isBuffer = true;
|
||||
argInfo.pureStatefulBufferAccess = isPureStateful;
|
||||
kernelInfo.kernelArgInfo.push_back(std::move(argInfo));
|
||||
|
||||
EXPECT_EQ(!isPureStateful, helper.requiresAuxResolves(kernelInfo));
|
||||
}
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(HwHelperTestGen12Lp, getMaxBarriersPerSliceReturnsCorrectSize) {
|
||||
|
@ -2744,11 +2744,11 @@ TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonState
|
||||
|
||||
capabilityTable.ftrRenderCompressedBuffers = true;
|
||||
kernel.mockKernel->initialize();
|
||||
EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired());
|
||||
EXPECT_EQ(HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo), kernel.mockKernel->isAuxTranslationRequired());
|
||||
|
||||
DebugManager.flags.ForceAuxTranslationEnabled.set(-1);
|
||||
kernel.mockKernel->initialize();
|
||||
EXPECT_EQ(HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(), kernel.mockKernel->isAuxTranslationRequired());
|
||||
EXPECT_EQ(HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo), kernel.mockKernel->isAuxTranslationRequired());
|
||||
|
||||
DebugManager.flags.ForceAuxTranslationEnabled.set(0);
|
||||
kernel.mockKernel->initialize();
|
||||
@ -2772,7 +2772,12 @@ TEST(KernelTest, WhenAuxTranslationIsRequiredThenKernelSetsRequiredResolvesInCon
|
||||
kernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = false;
|
||||
|
||||
kernel.mockKernel->initialize();
|
||||
EXPECT_TRUE(context->getResolvesRequiredInKernels());
|
||||
|
||||
if (HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo)) {
|
||||
EXPECT_TRUE(context->getResolvesRequiredInKernels());
|
||||
} else {
|
||||
EXPECT_FALSE(context->getResolvesRequiredInKernels());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(KernelTest, WhenAuxTranslationIsNotRequiredThenKernelDoesNotSetRequiredResolvesInContext) {
|
||||
@ -2814,7 +2819,7 @@ TEST(KernelTest, givenDebugVariableSetWhenKernelHasStatefulBufferAccessThenMarkK
|
||||
|
||||
kernel.mockKernel->initialize();
|
||||
|
||||
if (HwHelper::get(localHwInfo.platform.eRenderCoreFamily).requiresAuxResolves()) {
|
||||
if (HwHelper::get(localHwInfo.platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo)) {
|
||||
EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired());
|
||||
} else {
|
||||
EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired());
|
||||
|
@ -8,10 +8,13 @@
|
||||
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
|
||||
|
||||
#include "shared/source/device/device_info.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/test/unit_test/helpers/default_hw_info.h"
|
||||
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
@ -40,3 +43,14 @@ bool TestChecks::supportsDeviceEnqueue(const Context *pContext) {
|
||||
bool TestChecks::supportsDeviceEnqueue(const std::unique_ptr<HardwareInfo> &pHardwareInfo) {
|
||||
return pHardwareInfo->capabilityTable.supportsDeviceEnqueue;
|
||||
}
|
||||
|
||||
bool TestChecks::supportsAuxResolves() {
|
||||
KernelInfo kernelInfo{};
|
||||
KernelArgInfo argInfo{};
|
||||
argInfo.isBuffer = true;
|
||||
argInfo.pureStatefulBufferAccess = false;
|
||||
kernelInfo.kernelArgInfo.push_back(std::move(argInfo));
|
||||
|
||||
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
return hwHelper.requiresAuxResolves(kernelInfo);
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ bool supportsDeviceEnqueue(const ClDevice *pClDevice);
|
||||
bool supportsDeviceEnqueue(const Context *pContext);
|
||||
bool supportsDeviceEnqueue(const std::unique_ptr<HardwareInfo> &pHardwareInfo);
|
||||
bool supportsPipes(const ClDevice *pClDevice);
|
||||
bool supportsAuxResolves();
|
||||
} // namespace TestChecks
|
||||
|
||||
} // namespace NEO
|
||||
@ -49,3 +50,8 @@ bool supportsPipes(const ClDevice *pClDevice);
|
||||
if (NEO::TestChecks::supportsDeviceEnqueue(param) == false) { \
|
||||
GTEST_SKIP(); \
|
||||
}
|
||||
|
||||
#define REQUIRE_AUX_RESOLVES() \
|
||||
if (NEO::TestChecks::supportsAuxResolves() == false) { \
|
||||
GTEST_SKIP(); \
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ class GraphicsAllocation;
|
||||
struct AllocationData;
|
||||
struct AllocationProperties;
|
||||
struct HardwareCapabilities;
|
||||
struct KernelInfo;
|
||||
struct RootDeviceEnvironment;
|
||||
struct PipeControlArgs;
|
||||
|
||||
@ -99,7 +100,7 @@ class HwHelper {
|
||||
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
|
||||
virtual uint32_t getMetricsLibraryGenId() const = 0;
|
||||
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
|
||||
virtual bool requiresAuxResolves() const = 0;
|
||||
virtual bool requiresAuxResolves(const KernelInfo &kernelInfo) const = 0;
|
||||
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
|
||||
virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
|
||||
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
@ -133,6 +134,7 @@ class HwHelper {
|
||||
|
||||
protected:
|
||||
virtual LocalMemoryAccessMode getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const = 0;
|
||||
|
||||
HwHelper() = default;
|
||||
};
|
||||
@ -250,7 +252,7 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const override;
|
||||
|
||||
bool requiresAuxResolves() const override;
|
||||
bool requiresAuxResolves(const KernelInfo &kernelInfo) const override;
|
||||
|
||||
bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
|
||||
|
||||
@ -312,6 +314,7 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
protected:
|
||||
LocalMemoryAccessMode getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const override;
|
||||
bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const override;
|
||||
|
||||
static const AuxTranslationMode defaultAuxTranslationMode;
|
||||
HwHelperHw() = default;
|
||||
|
@ -317,8 +317,8 @@ uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool HwHelperHw<GfxFamily>::requiresAuxResolves() const {
|
||||
return true;
|
||||
inline bool HwHelperHw<GfxFamily>::requiresAuxResolves(const KernelInfo &kernelInfo) const {
|
||||
return hasStatelessAccessToBuffer(kernelInfo);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@ -447,6 +447,17 @@ inline LocalMemoryAccessMode HwHelperHw<GfxFamily>::getDefaultLocalMemoryAccessM
|
||||
return LocalMemoryAccessMode::Default;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool HwHelperHw<GfxFamily>::hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const {
|
||||
bool hasStatelessAccessToBuffer = false;
|
||||
for (uint32_t i = 0; i < kernelInfo.kernelArgInfo.size(); ++i) {
|
||||
if (kernelInfo.kernelArgInfo[i].isBuffer) {
|
||||
hasStatelessAccessToBuffer |= !kernelInfo.kernelArgInfo[i].pureStatefulBufferAccess;
|
||||
}
|
||||
}
|
||||
return hasStatelessAccessToBuffer;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
|
||||
return sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
|
Reference in New Issue
Block a user