W/A for disabling RCC RHWO for compressed media surfaces on gen12lp

Whenever media compressed surface is used, the RCC Read-Hit-Write optimization
disable bit (14) has to be set in Common Slice Chicken1 register (7010h).

Related-To: NEO-4982

Change-Id: I71b91b52692252459da05b737838eb4854575974
Signed-off-by: Pawel Wilma <pawel.wilma@intel.com>
This commit is contained in:
Pawel Wilma
2020-10-09 12:27:32 +02:00
committed by sys_ocldev
parent ca023fa532
commit 0c3d430f50
17 changed files with 285 additions and 3 deletions

View File

@@ -101,6 +101,7 @@ class GpgpuWalkerHelper {
bool disablePerfMode);
static size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel);
static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel);
static size_t setGpgpuWalkerThreadData(
WALKER_TYPE<GfxFamily> *walkerCmd,

View File

@@ -171,6 +171,11 @@ size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(const K
return (size_t)0;
}
template <typename GfxFamily>
size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) {
return 0u;
}
template <typename GfxFamily>
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
size_t expectedSizeCS = 0;

View File

@@ -211,6 +211,7 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilin
}
size += PerformanceCounters::getGpuCommandsSize(commandQueue, reservePerfCounters);
size += GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(pKernel);
size += GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(pKernel);
return size;
}

View File

@@ -6,6 +6,9 @@
*/
#include "shared/source/gen12lp/hw_info.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/resource_info.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/command_queue/gpgpu_walker_bdw_plus.inl"
#include "opencl/source/command_queue/hardware_interface_bdw_plus.inl"
@@ -51,6 +54,39 @@ void GpgpuWalkerHelper<TGLLPFamily>::dispatchProfilingCommandsEnd(
args);
}
template <>
void HardwareInterface<TGLLPFamily>::dispatchWorkarounds(
LinearStream *commandStream,
CommandQueue &commandQueue,
Kernel &kernel,
const bool &enable) {
using MI_LOAD_REGISTER_IMM = typename TGLLPFamily::MI_LOAD_REGISTER_IMM;
using PIPE_CONTROL = typename TGLLPFamily::PIPE_CONTROL;
if (kernel.requiresWaDisableRccRhwoOptimization()) {
PIPE_CONTROL cmdPipeControl = TGLLPFamily::cmdInitPipeControl;
cmdPipeControl.setCommandStreamerStallEnable(true);
auto pCmdPipeControl = commandStream->getSpaceForCmd<PIPE_CONTROL>();
*pCmdPipeControl = cmdPipeControl;
uint32_t value = enable ? 0x40004000 : 0x40000000;
NEO::LriHelper<TGLLPFamily>::program(commandStream,
0x7010,
value,
false);
}
}
template <>
size_t GpgpuWalkerHelper<TGLLPFamily>::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) {
if (pKernel->requiresWaDisableRccRhwoOptimization()) {
return (2 * (sizeof(TGLLP::PIPE_CONTROL) + sizeof(TGLLPFamily::MI_LOAD_REGISTER_IMM)));
}
return 0u;
}
template class HardwareInterface<TGLLPFamily>;
template class GpgpuWalkerHelper<TGLLPFamily>;

View File

@@ -2487,4 +2487,25 @@ uint32_t Kernel::getAdditionalKernelExecInfo() const {
return this->additionalKernelExecInfo;
}
bool Kernel::requiresWaDisableRccRhwoOptimization() const {
auto &hardwareInfo = getDevice().getHardwareInfo();
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
if (hwHelper.isWaDisableRccRhwoOptimizationRequired() && isUsingSharedObjArgs()) {
for (auto &arg : getKernelArguments()) {
auto clMemObj = static_cast<cl_mem>(arg.object);
auto memObj = castToObject<MemObj>(clMemObj);
if (memObj && memObj->peekSharingHandler()) {
auto allocation = memObj->getGraphicsAllocation(getDevice().getRootDeviceIndex());
for (uint32_t handleId = 0u; handleId < allocation->getNumGmms(); handleId++) {
if (allocation->getGmm(handleId)->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed) {
return true;
}
}
}
}
}
return false;
}
} // namespace NEO

View File

@@ -421,6 +421,7 @@ class Kernel : public BaseObject<_cl_kernel> {
int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName);
void setAdditionalKernelExecInfo(uint32_t additionalKernelExecInfo);
uint32_t getAdditionalKernelExecInfo() const;
MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization() const;
protected:
struct ObjectCounts {

View File

@@ -16,6 +16,7 @@
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_gmm.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "test.h"
@@ -375,7 +376,12 @@ TEST_F(EnqueueMapBufferTest, givenNonReadOnlyBufferWhenMappedOnGpuThenSetValidEv
std::unique_ptr<Buffer> buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, retVal));
buffer->setSharingHandler(new SharingHandler());
auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
gfxAllocation->setGmm(new MockGmm(), handleId);
}
buffer->forceDisallowCPUCopy = true;
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer.get());
@@ -418,6 +424,10 @@ TEST_F(EnqueueMapBufferTest, givenReadOnlyBufferWhenMappedOnGpuThenSetValidEvent
std::unique_ptr<Buffer> buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, retVal));
buffer->setSharingHandler(new SharingHandler());
auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
gfxAllocation->setGmm(new MockGmm(), handleId);
}
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer.get());

View File

@@ -18,6 +18,7 @@
#include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_csr.h"
#include "opencl/test/unit_test/mocks/mock_gmm.h"
#include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h"
using namespace NEO;
@@ -26,9 +27,11 @@ typedef HelloWorldFixture<HelloWorldFixtureFactory> EnqueueKernelFixture;
typedef Test<EnqueueKernelFixture> EnqueueKernelTest;
TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenResetPatchAddress) {
auto nonSharedBuffer = new MockBuffer;
MockGlSharing glSharing;
glSharing.uploadDataToBufferInfo(1, 0);
MockGmm mockGmm;
glSharing.uploadDataToBufferInfo(1, 0, mockGmm.gmmResourceInfo->peekHandle());
pContext->setSharingFunctions(glSharing.sharingFunctions.release());
auto retVal = CL_SUCCESS;
auto sharedBuffer = GlBuffer::createSharedGlBuffer(pContext, CL_MEM_READ_WRITE, 1, &retVal);
@@ -51,7 +54,7 @@ TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenRes
EXPECT_EQ(sharedBufferGpuAddress, address1);
// update address
glSharing.uploadDataToBufferInfo(1, 1);
glSharing.uploadDataToBufferInfo(1, 1, mockGmm.gmmResourceInfo->peekHandle());
pCmdQ->enqueueAcquireSharedObjects(1, &sharedMem, 0, nullptr, nullptr, CL_COMMAND_ACQUIRE_GL_OBJECTS);
callOneWorkItemNDRKernel();

View File

@@ -11,6 +11,7 @@
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_allocation_properties.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_gmm.h"
#include "test.h"
using namespace NEO;
@@ -97,6 +98,10 @@ struct MultipleMapBufferTest : public ClDeviceFixture, public ::testing::Test {
GraphicsAllocationHelper::toMultiGraphicsAllocation(mockAlloc), false, false, false);
if (mapOnGpu) {
buffer->setSharingHandler(new SharingHandler());
auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
gfxAllocation->setGmm(new MockGmm(), handleId);
}
}
return std::unique_ptr<MockBuffer<FamilyType>>(buffer);
}

View File

@@ -6,9 +6,14 @@
*/
#include "shared/source/gen12lp/hw_info.h"
#include "shared/test/unit_test/cmd_parse/hw_parse.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/command_queue/hardware_interface.h"
#include "opencl/test/unit_test/command_stream/linear_stream_fixture.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "test.h"
namespace NEO {
@@ -29,4 +34,116 @@ GEN12LPTEST_F(GpgpuWalkerTests, givenMiStoreRegMemWhenAdjustMiStoreRegMemModeThe
EXPECT_EQ(true, cmd.getMmioRemapEnable());
}
class MockKernelWithApplicableWa : public MockKernel {
public:
MockKernelWithApplicableWa(Program *program, KernelInfo &kernelInfo, ClDevice &device) : MockKernel(program, kernelInfo, device) {}
bool requiresWaDisableRccRhwoOptimization() const override {
return waApplicable;
}
bool waApplicable = false;
};
struct HardwareInterfaceTests : public ClDeviceFixture, public LinearStreamFixture, public ::testing::Test {
void SetUp() override {
ClDeviceFixture::SetUp();
LinearStreamFixture::SetUp();
pContext = new NEO::MockContext(pClDevice);
pCommandQueue = new MockCommandQueue(pContext, pClDevice, nullptr);
pProgram = new MockProgram(*pClDevice->getExecutionEnvironment(), pContext, false, &pClDevice->getDevice());
pKernel = new MockKernelWithApplicableWa(static_cast<Program *>(pProgram), pProgram->mockKernelInfo, *pClDevice);
}
void TearDown() override {
pKernel->release();
pProgram->release();
pCommandQueue->release();
pContext->release();
LinearStreamFixture::TearDown();
ClDeviceFixture::TearDown();
}
CommandQueue *pCommandQueue = nullptr;
Context *pContext = nullptr;
MockProgram *pProgram = nullptr;
MockKernelWithApplicableWa *pKernel = nullptr;
};
GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithApplicableWaDisableRccRhwoOptimizationWhenDispatchWorkaroundsIsCalledThenWorkaroundIsApplied) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
bool enableWa = true;
pKernel->waApplicable = true;
HardwareInterface<FamilyType>::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa);
size_t expectedUsedForEnableWa = (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM));
ASSERT_EQ(expectedUsedForEnableWa, linearStream.getUsed());
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(linearStream);
auto itorPipeCtrl = find<PIPE_CONTROL *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
ASSERT_NE(hwParse.cmdList.end(), itorPipeCtrl);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*itorPipeCtrl);
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
ASSERT_NE(hwParse.cmdList.end(), itorLri);
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLri);
ASSERT_NE(nullptr, lriCmd);
EXPECT_EQ(0x7010u, lriCmd->getRegisterOffset());
EXPECT_EQ(0x40004000u, lriCmd->getDataDword());
enableWa = false;
HardwareInterface<FamilyType>::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa);
size_t expectedUsedForDisableWa = 2 * (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM));
ASSERT_EQ(expectedUsedForDisableWa, linearStream.getUsed());
hwParse.TearDown();
hwParse.parseCommands<FamilyType>(linearStream, expectedUsedForEnableWa);
itorPipeCtrl = find<PIPE_CONTROL *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
ASSERT_NE(hwParse.cmdList.end(), itorPipeCtrl);
pipeControl = genCmdCast<PIPE_CONTROL *>(*itorPipeCtrl);
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
itorLri = find<MI_LOAD_REGISTER_IMM *>(hwParse.cmdList.begin(), hwParse.cmdList.end());
ASSERT_NE(hwParse.cmdList.end(), itorLri);
lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLri);
ASSERT_NE(nullptr, lriCmd);
EXPECT_EQ(0x7010u, lriCmd->getRegisterOffset());
EXPECT_EQ(0x40000000u, lriCmd->getDataDword());
}
GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithoutApplicableWaDisableRccRhwoOptimizationWhenDispatchWorkaroundsIsCalledThenWorkaroundIsApplied) {
bool enableWa = true;
pKernel->waApplicable = false;
HardwareInterface<FamilyType>::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa);
EXPECT_EQ(0u, linearStream.getUsed());
enableWa = false;
HardwareInterface<FamilyType>::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa);
EXPECT_EQ(0u, linearStream.getUsed());
}
GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithApplicableWaDisableRccRhwoOptimizationWhenCalculatingCommandsSizeThenAppropriateSizeIsReturned) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
pKernel->waApplicable = true;
auto cmdSize = GpgpuWalkerHelper<FamilyType>::getSizeForWaDisableRccRhwoOptimization(pKernel);
size_t expectedSize = 2 * (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM));
EXPECT_EQ(expectedSize, cmdSize);
}
GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithoutApplicableWaDisableRccRhwoOptimizationWhenCalculatingCommandsSizeThenZeroIsReturned) {
pKernel->waApplicable = false;
auto cmdSize = GpgpuWalkerHelper<FamilyType>::getSizeForWaDisableRccRhwoOptimization(pKernel);
EXPECT_EQ(0u, cmdSize);
}
} // namespace NEO

View File

@@ -7,6 +7,7 @@
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "test.h"
@@ -18,3 +19,55 @@ GEN12LPTEST_F(Gen12LpKernelTest, givenKernelWhenCanTransformImagesIsCalledThenRe
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
EXPECT_FALSE(retVal);
}
GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenNotRunningOnGen12lpThenWaDisableRccRhwoOptimizationIsNotRequired) {
HardwareInfo hwInfoToModify = hardwareInfo;
hwInfoToModify.platform.eRenderCoreFamily = IGFX_GEN11_CORE;
delete pClDevice;
pDevice = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfoToModify, rootDeviceIndex);
ASSERT_NE(nullptr, pDevice);
pClDevice = new MockClDevice{pDevice};
ASSERT_NE(nullptr, pClDevice);
MockKernelWithInternals kernel(*pClDevice);
EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization());
}
GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenNotUsingSharedObjArgsThenWaDisableRccRhwoOptimizationIsNotRequired) {
MockKernelWithInternals kernel(*pClDevice);
EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization());
}
GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenAtLeastOneArgIsMediaCompressedThenWaDisableRccRhwoOptimizationIsRequired) {
MockKernelWithInternals kernel(*pClDevice);
kernel.kernelInfo.kernelArgInfo.resize(3);
kernel.kernelInfo.kernelArgInfo.at(0).isBuffer = true;
kernel.kernelInfo.kernelArgInfo.at(1).isBuffer = false;
kernel.kernelInfo.kernelArgInfo.at(2).isBuffer = true;
for (auto &kernelInfo : kernel.kernelInfo.kernelArgInfo) {
kernelInfo.kernelArgPatchInfoVector.resize(1);
}
kernel.mockKernel->initialize();
MockBuffer buffer;
auto allocation = buffer.getGraphicsAllocation(pClDevice->getRootDeviceIndex());
MockGmm gmm1;
allocation->setGmm(&gmm1, 0);
cl_mem clMem = &buffer;
kernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
uint32_t immediateArg = 0;
kernel.mockKernel->setArgImmediate(1, sizeof(uint32_t), &immediateArg);
MockBuffer bufferMediaCompressed;
bufferMediaCompressed.setSharingHandler(new SharingHandler());
allocation = bufferMediaCompressed.getGraphicsAllocation(pClDevice->getRootDeviceIndex());
MockGmm gmm2;
allocation->setGmm(&gmm2, 0);
allocation->getGmm(0)->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed = 1;
cl_mem clMem2 = &bufferMediaCompressed;
kernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2);
EXPECT_TRUE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization());
}

View File

@@ -24,6 +24,7 @@
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_execution_environment.h"
#include "opencl/test/unit_test/mocks/mock_gmm.h"
#include "test.h"
using namespace NEO;
@@ -1724,6 +1725,11 @@ HWTEST_F(BufferUnmapTest, givenBufferWithSharingHandlerWhenUnmappingThenUseNonBl
buffer->setSharingHandler(new SharingHandler());
EXPECT_NE(nullptr, buffer->peekSharingHandler());
auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
gfxAllocation->setGmm(new MockGmm(), handleId);
}
auto mappedPtr = clEnqueueMapBuffer(&cmdQ, buffer.get(), CL_TRUE, CL_MAP_WRITE, 0, 1, 0, nullptr, nullptr, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);

View File

@@ -140,9 +140,10 @@ class MockGlSharing {
void uploadDataToBufferInfo() {
dllParam->loadBuffer(m_bufferInfoOutput);
}
void uploadDataToBufferInfo(unsigned int sharedHandle, int bufferOffset) {
void uploadDataToBufferInfo(unsigned int sharedHandle, int bufferOffset, GMM_RESOURCE_INFO *gmmResInfo) {
m_bufferInfoOutput.globalShareHandle = sharedHandle;
m_bufferInfoOutput.bufferOffset = bufferOffset;
m_bufferInfoOutput.pGmmResInfo = gmmResInfo;
dllParam->loadBuffer(m_bufferInfoOutput);
}
void uploadDataToTextureInfo() {

View File

@@ -32,6 +32,7 @@
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_event.h"
#include "opencl/test/unit_test/mocks/mock_gmm.h"
#include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h"
#include "opencl/test/unit_test/mocks/mock_memory_manager.h"
#include "test.h"
@@ -898,6 +899,10 @@ TEST_F(glSharingTests, givenClGLBufferWhenMapAndUnmapBufferIsCalledThenCopyOnGpu
auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal);
auto buffer = castToObject<Buffer>(glBuffer);
EXPECT_EQ(buffer->getCpuAddressForMemoryTransfer(), nullptr); // no cpu ptr
auto gfxAllocation = buffer->getGraphicsAllocation(rootDeviceIndex);
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
gfxAllocation->setGmm(new MockGmm(), handleId);
}
auto commandQueue = CommandQueue::create(&context, context.getDevice(0), 0, false, retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
@@ -937,6 +942,10 @@ TEST_F(glSharingTests, givenClGLBufferWhenMapAndUnmapBufferIsCalledTwiceThenReus
auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal);
auto buffer = castToObject<Buffer>(glBuffer);
EXPECT_EQ(buffer->getCpuAddressForMemoryTransfer(), nullptr); // no cpu ptr
auto gfxAllocation = buffer->getGraphicsAllocation(rootDeviceIndex);
for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) {
gfxAllocation->setGmm(new MockGmm(), handleId);
}
auto commandQueue = CommandQueue::create(&context, context.getDevice(0), 0, false, retVal);
ASSERT_EQ(CL_SUCCESS, retVal);

View File

@@ -45,6 +45,11 @@ bool HwHelperHw<Family>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &h
return Gen12LPHelpers::isForceEmuInt32DivRemSPWARequired(hwInfo);
}
template <>
bool HwHelperHw<Family>::isWaDisableRccRhwoOptimizationRequired() const {
return true;
}
template <>
uint32_t HwHelperHw<Family>::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const {
/* For ICL+ maxThreadCount equals (EUCount * 8).

View File

@@ -109,6 +109,7 @@ class HwHelper {
virtual uint32_t computeSlmValues(uint32_t slmSize) = 0;
virtual bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) = 0;
virtual bool isWaDisableRccRhwoOptimizationRequired() const = 0;
virtual uint32_t getMinimalSIMDSize() = 0;
virtual uint32_t getHwRevIdFromStepping(uint32_t stepping, const HardwareInfo &hwInfo) const = 0;
virtual uint32_t getSteppingFromHwRevId(uint32_t hwRevId, const HardwareInfo &hwInfo) const = 0;
@@ -284,6 +285,8 @@ class HwHelperHw : public HwHelper {
bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) override;
bool isWaDisableRccRhwoOptimizationRequired() const override;
uint32_t getMinimalSIMDSize() override;
uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const override;

View File

@@ -394,6 +394,11 @@ bool HwHelperHw<GfxFamily>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo
return false;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isWaDisableRccRhwoOptimizationRequired() const {
return false;
}
template <typename GfxFamily>
inline uint32_t HwHelperHw<GfxFamily>::getMinimalSIMDSize() {
return 8u;