mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-26 07:00:17 +08:00
W/A for disabling RCC RHWO for compressed media surfaces on gen12lp
Whenever media compressed surface is used, the RCC Read-Hit-Write optimization disable bit (14) has to be set in Common Slice Chicken1 register (7010h). Related-To: NEO-4982 Change-Id: I71b91b52692252459da05b737838eb4854575974 Signed-off-by: Pawel Wilma <pawel.wilma@intel.com>
This commit is contained in:
@@ -101,6 +101,7 @@ class GpgpuWalkerHelper {
|
||||
bool disablePerfMode);
|
||||
|
||||
static size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel);
|
||||
static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel);
|
||||
|
||||
static size_t setGpgpuWalkerThreadData(
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
|
||||
@@ -171,6 +171,11 @@ size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
|
||||
return (size_t)0;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
|
||||
size_t expectedSizeCS = 0;
|
||||
|
||||
@@ -211,6 +211,7 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilin
|
||||
}
|
||||
size += PerformanceCounters::getGpuCommandsSize(commandQueue, reservePerfCounters);
|
||||
size += GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(pKernel);
|
||||
size += GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(pKernel);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/gen12lp/hw_info.h"
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/gmm_helper/resource_info.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_bdw_plus.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_bdw_plus.inl"
|
||||
@@ -51,6 +54,39 @@ void GpgpuWalkerHelper<TGLLPFamily>::dispatchProfilingCommandsEnd(
|
||||
args);
|
||||
}
|
||||
|
||||
template <>
|
||||
void HardwareInterface<TGLLPFamily>::dispatchWorkarounds(
|
||||
LinearStream *commandStream,
|
||||
CommandQueue &commandQueue,
|
||||
Kernel &kernel,
|
||||
const bool &enable) {
|
||||
|
||||
using MI_LOAD_REGISTER_IMM = typename TGLLPFamily::MI_LOAD_REGISTER_IMM;
|
||||
using PIPE_CONTROL = typename TGLLPFamily::PIPE_CONTROL;
|
||||
|
||||
if (kernel.requiresWaDisableRccRhwoOptimization()) {
|
||||
|
||||
PIPE_CONTROL cmdPipeControl = TGLLPFamily::cmdInitPipeControl;
|
||||
cmdPipeControl.setCommandStreamerStallEnable(true);
|
||||
auto pCmdPipeControl = commandStream->getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pCmdPipeControl = cmdPipeControl;
|
||||
|
||||
uint32_t value = enable ? 0x40004000 : 0x40000000;
|
||||
NEO::LriHelper<TGLLPFamily>::program(commandStream,
|
||||
0x7010,
|
||||
value,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t GpgpuWalkerHelper<TGLLPFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) {
|
||||
if (pKernel->requiresWaDisableRccRhwoOptimization()) {
|
||||
return (2 * (sizeof(TGLLP::PIPE_CONTROL) + sizeof(TGLLPFamily::MI_LOAD_REGISTER_IMM)));
|
||||
}
|
||||
return 0u;
|
||||
}
|
||||
|
||||
template class HardwareInterface<TGLLPFamily>;
|
||||
|
||||
template class GpgpuWalkerHelper<TGLLPFamily>;
|
||||
|
||||
@@ -2487,4 +2487,25 @@ uint32_t Kernel::getAdditionalKernelExecInfo() const {
|
||||
return this->additionalKernelExecInfo;
|
||||
}
|
||||
|
||||
bool Kernel::requiresWaDisableRccRhwoOptimization() const {
|
||||
auto &hardwareInfo = getDevice().getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (hwHelper.isWaDisableRccRhwoOptimizationRequired() && isUsingSharedObjArgs()) {
|
||||
for (auto &arg : getKernelArguments()) {
|
||||
auto clMemObj = static_cast<cl_mem>(arg.object);
|
||||
auto memObj = castToObject<MemObj>(clMemObj);
|
||||
if (memObj && memObj->peekSharingHandler()) {
|
||||
auto allocation = memObj->getGraphicsAllocation(getDevice().getRootDeviceIndex());
|
||||
for (uint32_t handleId = 0u; handleId < allocation->getNumGmms(); handleId++) {
|
||||
if (allocation->getGmm(handleId)->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -421,6 +421,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName);
|
||||
void setAdditionalKernelExecInfo(uint32_t additionalKernelExecInfo);
|
||||
uint32_t getAdditionalKernelExecInfo() const;
|
||||
MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization() const;
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_gmm.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "test.h"
|
||||
|
||||
@@ -375,7 +376,12 @@ TEST_F(EnqueueMapBufferTest, givenNonReadOnlyBufferWhenMappedOnGpuThenSetValidEv
|
||||
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, retVal));
|
||||
buffer->setSharingHandler(new SharingHandler());
|
||||
auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
|
||||
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
|
||||
gfxAllocation->setGmm(new MockGmm(), handleId);
|
||||
}
|
||||
buffer->forceDisallowCPUCopy = true;
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, buffer.get());
|
||||
|
||||
@@ -418,6 +424,10 @@ TEST_F(EnqueueMapBufferTest, givenReadOnlyBufferWhenMappedOnGpuThenSetValidEvent
|
||||
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, retVal));
|
||||
buffer->setSharingHandler(new SharingHandler());
|
||||
auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
|
||||
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
|
||||
gfxAllocation->setGmm(new MockGmm(), handleId);
|
||||
}
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, buffer.get());
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_csr.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_gmm.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h"
|
||||
|
||||
using namespace NEO;
|
||||
@@ -26,9 +27,11 @@ typedef HelloWorldFixture<HelloWorldFixtureFactory> EnqueueKernelFixture;
|
||||
typedef Test<EnqueueKernelFixture> EnqueueKernelTest;
|
||||
|
||||
TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenResetPatchAddress) {
|
||||
|
||||
auto nonSharedBuffer = new MockBuffer;
|
||||
MockGlSharing glSharing;
|
||||
glSharing.uploadDataToBufferInfo(1, 0);
|
||||
MockGmm mockGmm;
|
||||
glSharing.uploadDataToBufferInfo(1, 0, mockGmm.gmmResourceInfo->peekHandle());
|
||||
pContext->setSharingFunctions(glSharing.sharingFunctions.release());
|
||||
auto retVal = CL_SUCCESS;
|
||||
auto sharedBuffer = GlBuffer::createSharedGlBuffer(pContext, CL_MEM_READ_WRITE, 1, &retVal);
|
||||
@@ -51,7 +54,7 @@ TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenRes
|
||||
EXPECT_EQ(sharedBufferGpuAddress, address1);
|
||||
|
||||
// update address
|
||||
glSharing.uploadDataToBufferInfo(1, 1);
|
||||
glSharing.uploadDataToBufferInfo(1, 1, mockGmm.gmmResourceInfo->peekHandle());
|
||||
pCmdQ->enqueueAcquireSharedObjects(1, &sharedMem, 0, nullptr, nullptr, CL_COMMAND_ACQUIRE_GL_OBJECTS);
|
||||
|
||||
callOneWorkItemNDRKernel();
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_allocation_properties.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_gmm.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
@@ -97,6 +98,10 @@ struct MultipleMapBufferTest : public ClDeviceFixture, public ::testing::Test {
|
||||
GraphicsAllocationHelper::toMultiGraphicsAllocation(mockAlloc), false, false, false);
|
||||
if (mapOnGpu) {
|
||||
buffer->setSharingHandler(new SharingHandler());
|
||||
auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
|
||||
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
|
||||
gfxAllocation->setGmm(new MockGmm(), handleId);
|
||||
}
|
||||
}
|
||||
return std::unique_ptr<MockBuffer<FamilyType>>(buffer);
|
||||
}
|
||||
|
||||
@@ -6,9 +6,14 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/gen12lp/hw_info.h"
|
||||
#include "shared/test/unit_test/cmd_parse/hw_parse.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
#include "opencl/test/unit_test/command_stream/linear_stream_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "test.h"
|
||||
|
||||
namespace NEO {
|
||||
@@ -29,4 +34,116 @@ GEN12LPTEST_F(GpgpuWalkerTests, givenMiStoreRegMemWhenAdjustMiStoreRegMemModeThe
|
||||
|
||||
EXPECT_EQ(true, cmd.getMmioRemapEnable());
|
||||
}
|
||||
|
||||
class MockKernelWithApplicableWa : public MockKernel {
|
||||
public:
|
||||
MockKernelWithApplicableWa(Program *program, KernelInfo &kernelInfo, ClDevice &device) : MockKernel(program, kernelInfo, device) {}
|
||||
bool requiresWaDisableRccRhwoOptimization() const override {
|
||||
return waApplicable;
|
||||
}
|
||||
bool waApplicable = false;
|
||||
};
|
||||
|
||||
struct HardwareInterfaceTests : public ClDeviceFixture, public LinearStreamFixture, public ::testing::Test {
|
||||
void SetUp() override {
|
||||
ClDeviceFixture::SetUp();
|
||||
LinearStreamFixture::SetUp();
|
||||
|
||||
pContext = new NEO::MockContext(pClDevice);
|
||||
pCommandQueue = new MockCommandQueue(pContext, pClDevice, nullptr);
|
||||
pProgram = new MockProgram(*pClDevice->getExecutionEnvironment(), pContext, false, &pClDevice->getDevice());
|
||||
pKernel = new MockKernelWithApplicableWa(static_cast<Program *>(pProgram), pProgram->mockKernelInfo, *pClDevice);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
pKernel->release();
|
||||
pProgram->release();
|
||||
pCommandQueue->release();
|
||||
pContext->release();
|
||||
|
||||
LinearStreamFixture::TearDown();
|
||||
ClDeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
CommandQueue *pCommandQueue = nullptr;
|
||||
Context *pContext = nullptr;
|
||||
MockProgram *pProgram = nullptr;
|
||||
MockKernelWithApplicableWa *pKernel = nullptr;
|
||||
};
|
||||
|
||||
GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithApplicableWaDisableRccRhwoOptimizationWhenDispatchWorkaroundsIsCalledThenWorkaroundIsApplied) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
bool enableWa = true;
|
||||
pKernel->waApplicable = true;
|
||||
|
||||
HardwareInterface<FamilyType>::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa);
|
||||
size_t expectedUsedForEnableWa = (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM));
|
||||
ASSERT_EQ(expectedUsedForEnableWa, linearStream.getUsed());
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(linearStream);
|
||||
auto itorPipeCtrl = find<PIPE_CONTROL *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
||||
ASSERT_NE(hwParse.cmdList.end(), itorPipeCtrl);
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*itorPipeCtrl);
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||
|
||||
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
||||
ASSERT_NE(hwParse.cmdList.end(), itorLri);
|
||||
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLri);
|
||||
ASSERT_NE(nullptr, lriCmd);
|
||||
EXPECT_EQ(0x7010u, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0x40004000u, lriCmd->getDataDword());
|
||||
|
||||
enableWa = false;
|
||||
HardwareInterface<FamilyType>::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa);
|
||||
size_t expectedUsedForDisableWa = 2 * (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM));
|
||||
ASSERT_EQ(expectedUsedForDisableWa, linearStream.getUsed());
|
||||
|
||||
hwParse.TearDown();
|
||||
hwParse.parseCommands<FamilyType>(linearStream, expectedUsedForEnableWa);
|
||||
itorPipeCtrl = find<PIPE_CONTROL *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
||||
ASSERT_NE(hwParse.cmdList.end(), itorPipeCtrl);
|
||||
pipeControl = genCmdCast<PIPE_CONTROL *>(*itorPipeCtrl);
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||
|
||||
itorLri = find<MI_LOAD_REGISTER_IMM *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
|
||||
ASSERT_NE(hwParse.cmdList.end(), itorLri);
|
||||
lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLri);
|
||||
ASSERT_NE(nullptr, lriCmd);
|
||||
EXPECT_EQ(0x7010u, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0x40000000u, lriCmd->getDataDword());
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithoutApplicableWaDisableRccRhwoOptimizationWhenDispatchWorkaroundsIsCalledThenWorkaroundIsApplied) {
|
||||
bool enableWa = true;
|
||||
pKernel->waApplicable = false;
|
||||
|
||||
HardwareInterface<FamilyType>::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa);
|
||||
EXPECT_EQ(0u, linearStream.getUsed());
|
||||
|
||||
enableWa = false;
|
||||
HardwareInterface<FamilyType>::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa);
|
||||
EXPECT_EQ(0u, linearStream.getUsed());
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithApplicableWaDisableRccRhwoOptimizationWhenCalculatingCommandsSizeThenAppropriateSizeIsReturned) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
pKernel->waApplicable = true;
|
||||
auto cmdSize = GpgpuWalkerHelper<FamilyType>::getSizeForWaDisableRccRhwoOptimization(pKernel);
|
||||
size_t expectedSize = 2 * (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM));
|
||||
EXPECT_EQ(expectedSize, cmdSize);
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithoutApplicableWaDisableRccRhwoOptimizationWhenCalculatingCommandsSizeThenZeroIsReturned) {
|
||||
pKernel->waApplicable = false;
|
||||
auto cmdSize = GpgpuWalkerHelper<FamilyType>::getSizeForWaDisableRccRhwoOptimization(pKernel);
|
||||
EXPECT_EQ(0u, cmdSize);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_kernel.h"
|
||||
#include "test.h"
|
||||
|
||||
@@ -18,3 +19,55 @@ GEN12LPTEST_F(Gen12LpKernelTest, givenKernelWhenCanTransformImagesIsCalledThenRe
|
||||
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
|
||||
EXPECT_FALSE(retVal);
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenNotRunningOnGen12lpThenWaDisableRccRhwoOptimizationIsNotRequired) {
|
||||
HardwareInfo hwInfoToModify = hardwareInfo;
|
||||
hwInfoToModify.platform.eRenderCoreFamily = IGFX_GEN11_CORE;
|
||||
delete pClDevice;
|
||||
pDevice = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfoToModify, rootDeviceIndex);
|
||||
ASSERT_NE(nullptr, pDevice);
|
||||
pClDevice = new MockClDevice{pDevice};
|
||||
ASSERT_NE(nullptr, pClDevice);
|
||||
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization());
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenNotUsingSharedObjArgsThenWaDisableRccRhwoOptimizationIsNotRequired) {
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization());
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenAtLeastOneArgIsMediaCompressedThenWaDisableRccRhwoOptimizationIsRequired) {
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
kernel.kernelInfo.kernelArgInfo.resize(3);
|
||||
kernel.kernelInfo.kernelArgInfo.at(0).isBuffer = true;
|
||||
kernel.kernelInfo.kernelArgInfo.at(1).isBuffer = false;
|
||||
kernel.kernelInfo.kernelArgInfo.at(2).isBuffer = true;
|
||||
for (auto &kernelInfo : kernel.kernelInfo.kernelArgInfo) {
|
||||
kernelInfo.kernelArgPatchInfoVector.resize(1);
|
||||
}
|
||||
kernel.mockKernel->initialize();
|
||||
|
||||
MockBuffer buffer;
|
||||
auto allocation = buffer.getGraphicsAllocation(pClDevice->getRootDeviceIndex());
|
||||
MockGmm gmm1;
|
||||
allocation->setGmm(&gmm1, 0);
|
||||
|
||||
cl_mem clMem = &buffer;
|
||||
kernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
|
||||
|
||||
uint32_t immediateArg = 0;
|
||||
kernel.mockKernel->setArgImmediate(1, sizeof(uint32_t), &immediateArg);
|
||||
|
||||
MockBuffer bufferMediaCompressed;
|
||||
bufferMediaCompressed.setSharingHandler(new SharingHandler());
|
||||
allocation = bufferMediaCompressed.getGraphicsAllocation(pClDevice->getRootDeviceIndex());
|
||||
MockGmm gmm2;
|
||||
allocation->setGmm(&gmm2, 0);
|
||||
allocation->getGmm(0)->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed = 1;
|
||||
cl_mem clMem2 = &bufferMediaCompressed;
|
||||
kernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2);
|
||||
|
||||
EXPECT_TRUE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization());
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_buffer.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_execution_environment.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_gmm.h"
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
@@ -1724,6 +1725,11 @@ HWTEST_F(BufferUnmapTest, givenBufferWithSharingHandlerWhenUnmappingThenUseNonBl
|
||||
buffer->setSharingHandler(new SharingHandler());
|
||||
EXPECT_NE(nullptr, buffer->peekSharingHandler());
|
||||
|
||||
auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
|
||||
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
|
||||
gfxAllocation->setGmm(new MockGmm(), handleId);
|
||||
}
|
||||
|
||||
auto mappedPtr = clEnqueueMapBuffer(&cmdQ, buffer.get(), CL_TRUE, CL_MAP_WRITE, 0, 1, 0, nullptr, nullptr, &retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
|
||||
@@ -140,9 +140,10 @@ class MockGlSharing {
|
||||
void uploadDataToBufferInfo() {
|
||||
dllParam->loadBuffer(m_bufferInfoOutput);
|
||||
}
|
||||
void uploadDataToBufferInfo(unsigned int sharedHandle, int bufferOffset) {
|
||||
void uploadDataToBufferInfo(unsigned int sharedHandle, int bufferOffset, GMM_RESOURCE_INFO *gmmResInfo) {
|
||||
m_bufferInfoOutput.globalShareHandle = sharedHandle;
|
||||
m_bufferInfoOutput.bufferOffset = bufferOffset;
|
||||
m_bufferInfoOutput.pGmmResInfo = gmmResInfo;
|
||||
dllParam->loadBuffer(m_bufferInfoOutput);
|
||||
}
|
||||
void uploadDataToTextureInfo() {
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_event.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_gmm.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_memory_manager.h"
|
||||
#include "test.h"
|
||||
@@ -898,6 +899,10 @@ TEST_F(glSharingTests, givenClGLBufferWhenMapAndUnmapBufferIsCalledThenCopyOnGpu
|
||||
auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal);
|
||||
auto buffer = castToObject<Buffer>(glBuffer);
|
||||
EXPECT_EQ(buffer->getCpuAddressForMemoryTransfer(), nullptr); // no cpu ptr
|
||||
auto gfxAllocation = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
|
||||
gfxAllocation->setGmm(new MockGmm(), handleId);
|
||||
}
|
||||
|
||||
auto commandQueue = CommandQueue::create(&context, context.getDevice(0), 0, false, retVal);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
@@ -937,6 +942,10 @@ TEST_F(glSharingTests, givenClGLBufferWhenMapAndUnmapBufferIsCalledTwiceThenReus
|
||||
auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal);
|
||||
auto buffer = castToObject<Buffer>(glBuffer);
|
||||
EXPECT_EQ(buffer->getCpuAddressForMemoryTransfer(), nullptr); // no cpu ptr
|
||||
auto gfxAllocation = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
|
||||
gfxAllocation->setGmm(new MockGmm(), handleId);
|
||||
}
|
||||
|
||||
auto commandQueue = CommandQueue::create(&context, context.getDevice(0), 0, false, retVal);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
@@ -45,6 +45,11 @@ bool HwHelperHw<Family>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &h
|
||||
return Gen12LPHelpers::isForceEmuInt32DivRemSPWARequired(hwInfo);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isWaDisableRccRhwoOptimizationRequired() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const {
|
||||
/* For ICL+ maxThreadCount equals (EUCount * 8).
|
||||
|
||||
@@ -109,6 +109,7 @@ class HwHelper {
|
||||
virtual uint32_t computeSlmValues(uint32_t slmSize) = 0;
|
||||
|
||||
virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0;
|
||||
virtual bool isWaDisableRccRhwoOptimizationRequired() const = 0;
|
||||
virtual uint32_t getMinimalSIMDSize() = 0;
|
||||
virtual uint32_t getHwRevIdFromStepping(uint32_t stepping, const HardwareInfo &hwInfo) const = 0;
|
||||
virtual uint32_t getSteppingFromHwRevId(uint32_t hwRevId, const HardwareInfo &hwInfo) const = 0;
|
||||
@@ -284,6 +285,8 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) override;
|
||||
|
||||
bool isWaDisableRccRhwoOptimizationRequired() const override;
|
||||
|
||||
uint32_t getMinimalSIMDSize() override;
|
||||
|
||||
uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const override;
|
||||
|
||||
@@ -394,6 +394,11 @@ bool HwHelperHw<GfxFamily>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isWaDisableRccRhwoOptimizationRequired() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline uint32_t HwHelperHw<GfxFamily>::getMinimalSIMDSize() {
|
||||
return 8u;
|
||||
|
||||
Reference in New Issue
Block a user