mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 05:56:36 +08:00
Remove KernelArgInfo and use KernelDescriptor's args instead
Related-To: NEO-4729 Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
215051c48f
commit
ef71915c71
@@ -109,14 +109,8 @@ struct BlitEnqueueTests : public ::testing::Test {
|
||||
|
||||
template <size_t N>
|
||||
void setMockKernelArgs(std::array<Buffer *, N> buffers) {
|
||||
if (mockKernel->kernelInfo.kernelArgInfo.size() < buffers.size()) {
|
||||
mockKernel->kernelInfo.kernelArgInfo.resize(buffers.size());
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < buffers.size(); i++) {
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1);
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).isBuffer = true;
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false;
|
||||
mockKernel->kernelInfo.addArgBuffer(i, 0);
|
||||
}
|
||||
|
||||
mockKernel->mockKernel->initialize();
|
||||
@@ -130,14 +124,8 @@ struct BlitEnqueueTests : public ::testing::Test {
|
||||
|
||||
template <size_t N>
|
||||
void setMockKernelArgs(std::array<GraphicsAllocation *, N> allocs) {
|
||||
if (mockKernel->kernelInfo.kernelArgInfo.size() < allocs.size()) {
|
||||
mockKernel->kernelInfo.kernelArgInfo.resize(allocs.size());
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < allocs.size(); i++) {
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1);
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).isBuffer = true;
|
||||
mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false;
|
||||
mockKernel->kernelInfo.addArgBuffer(i, 0);
|
||||
}
|
||||
|
||||
mockKernel->mockKernel->initialize();
|
||||
|
||||
@@ -102,7 +102,9 @@ struct CommandQueueStateless : public CommandQueueHw<FamilyType> {
|
||||
auto kernel = dispatchInfo.begin()->getKernel();
|
||||
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
if (kernel->getKernelInfo().getArgDescriptorAt(0).is<ArgDescriptor::ArgTPointer>()) {
|
||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -120,7 +122,7 @@ struct CommandQueueStateful : public CommandQueueHw<FamilyType> {
|
||||
}
|
||||
} else {
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1044,7 +1044,6 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC
|
||||
MockCommandQueue cmdQ(context.get(), pClDevice, 0);
|
||||
|
||||
const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful;
|
||||
kernel->getAllocatedKernelInfo()->usesSsh = true;
|
||||
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress);
|
||||
auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver();
|
||||
|
||||
@@ -1065,7 +1064,6 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSet
|
||||
MockCommandQueue cmdQ(context.get(), pClDevice, 0);
|
||||
|
||||
const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful;
|
||||
kernel->getAllocatedKernelInfo()->usesSsh = true;
|
||||
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress);
|
||||
auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver();
|
||||
commandStreamReceiver.allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
|
||||
|
||||
@@ -43,39 +43,20 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
|
||||
|
||||
program = std::make_unique<MockProgram>(toClDeviceVector(*pClDevice));
|
||||
|
||||
memset(&kernelHeader, 0, sizeof(kernelHeader));
|
||||
kernelHeader.KernelHeapSize = sizeof(kernelIsa);
|
||||
|
||||
SPatchDataParameterStream dataParameterStream = {};
|
||||
memset(&dataParameterStream, 0, sizeof(dataParameterStream));
|
||||
dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData);
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, dataParameterStream);
|
||||
populateKernelDescriptor(kernelInfoWithSampler.kernelDescriptor, dataParameterStream);
|
||||
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
memset(&threadPayload, 0, sizeof(threadPayload));
|
||||
threadPayload.LocalIDXPresent = 1;
|
||||
threadPayload.LocalIDYPresent = 1;
|
||||
threadPayload.LocalIDZPresent = 1;
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload);
|
||||
populateKernelDescriptor(kernelInfoWithSampler.kernelDescriptor, threadPayload);
|
||||
|
||||
SPatchSamplerStateArray samplerArray = {};
|
||||
samplerArray.BorderColorOffset = 0;
|
||||
samplerArray.Count = 1;
|
||||
samplerArray.Offset = 4;
|
||||
samplerArray.Size = 2;
|
||||
samplerArray.Token = 0;
|
||||
populateKernelDescriptor(kernelInfoWithSampler.kernelDescriptor, samplerArray);
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
kernelInfo.setCrossThreadDataSize(64);
|
||||
kernelInfo.setLocalIds({1, 1, 1});
|
||||
kernelInfo.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
|
||||
kernelInfoWithSampler.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
kernelInfoWithSampler.setCrossThreadDataSize(64);
|
||||
kernelInfoWithSampler.setLocalIds({1, 1, 1});
|
||||
kernelInfoWithSampler.setSamplerTable(0, 1, 4);
|
||||
kernelInfoWithSampler.heapInfo.pKernelHeap = kernelIsa;
|
||||
kernelInfoWithSampler.heapInfo.KernelHeapSize = sizeof(kernelIsa);
|
||||
kernelInfoWithSampler.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
kernelInfoWithSampler.heapInfo.pDsh = static_cast<const void *>(dsh);
|
||||
kernelInfoWithSampler.heapInfo.DynamicStateHeapSize = sizeof(dsh);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
@@ -96,13 +77,10 @@ struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture,
|
||||
std::unique_ptr<MockContext> context;
|
||||
std::unique_ptr<MockProgram> program;
|
||||
|
||||
SKernelBinaryHeaderCommon kernelHeader = {};
|
||||
|
||||
KernelInfo kernelInfo;
|
||||
KernelInfo kernelInfoWithSampler;
|
||||
MockKernelInfo kernelInfo;
|
||||
MockKernelInfo kernelInfoWithSampler;
|
||||
|
||||
uint32_t kernelIsa[32];
|
||||
uint32_t crossThreadData[32];
|
||||
uint32_t dsh[32];
|
||||
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
@@ -197,12 +175,8 @@ HWTEST_F(DispatchWalkerTest, WhenDispatchingWalkerThenCommandStreamMemoryIsntCha
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDispatched) {
|
||||
SPatchThreadPayload threadPayload = {};
|
||||
threadPayload.LocalIDXPresent = 0;
|
||||
threadPayload.LocalIDYPresent = 0;
|
||||
threadPayload.LocalIDZPresent = 0;
|
||||
threadPayload.UnusedPerThreadConstantPresent = 1;
|
||||
populateKernelDescriptor(kernelInfo.kernelDescriptor, threadPayload);
|
||||
kernelInfo.setLocalIds({0, 0, 0});
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent = true;
|
||||
|
||||
MockKernel kernel(program.get(), kernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
@@ -184,7 +184,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DTh
|
||||
|
||||
EXPECT_NE(dshBefore, pDSH->getUsed());
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->usesBindfulAddressingForBuffers()) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
@@ -213,7 +213,7 @@ HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessK
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) {
|
||||
|
||||
@@ -206,7 +206,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenIndirectDataGetsAdded) {
|
||||
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernelDescriptor, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->usesBindfulAddressingForBuffers()) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
@@ -233,7 +233,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsU
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) {
|
||||
|
||||
@@ -170,9 +170,10 @@ HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenSurfaceStateI
|
||||
mockCmdQ->storeMultiDispatchInfo = true;
|
||||
enqueueCopyBufferToImage<FamilyType>();
|
||||
|
||||
auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[1].offsetHeap / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo();
|
||||
uint32_t index = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(1).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast<uint32_t>(index));
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), index);
|
||||
const auto &imageDesc = dstImage->getImageDesc();
|
||||
// EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth());
|
||||
|
||||
@@ -172,9 +172,10 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) {
|
||||
|
||||
enqueueCopyImage<FamilyType>();
|
||||
|
||||
const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo();
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[i].offsetHeap / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast<uint32_t>(index));
|
||||
uint32_t index = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(i).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), index);
|
||||
const auto &imageDesc = dstImage->getImageDesc();
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight());
|
||||
@@ -192,12 +193,12 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) {
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment());
|
||||
}
|
||||
|
||||
auto srcIndex = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[0].offsetHeap / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &srcSurfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast<uint32_t>(srcIndex));
|
||||
uint32_t srcIndex = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(0).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &srcSurfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), srcIndex);
|
||||
EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), srcSurfaceState.getSurfaceBaseAddress());
|
||||
|
||||
auto dstIndex = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[1].offsetHeap / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &dstSurfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast<uint32_t>(dstIndex));
|
||||
uint32_t dstIndex = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(1).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &dstSurfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), dstIndex);
|
||||
EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), dstSurfaceState.getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
|
||||
@@ -119,7 +119,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenIndirectDataGetsAdded)
|
||||
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernelDescriptor, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->usesBindfulAddressingForBuffers()) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
|
||||
@@ -366,7 +366,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKerne
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
|
||||
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
|
||||
|
||||
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||
}
|
||||
|
||||
@@ -177,9 +177,10 @@ HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenSurfaceStateIsCorrect) {
|
||||
mockCmdQ->storeMultiDispatchInfo = true;
|
||||
enqueueFillImage<FamilyType>();
|
||||
|
||||
auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[0].offsetHeap / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo();
|
||||
uint32_t index = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(0).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast<uint32_t>(index));
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), index);
|
||||
const auto &imageDesc = image->getImageDesc();
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight());
|
||||
|
||||
@@ -578,16 +578,6 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs
|
||||
using SyncBufferHandler::graphicsAllocation;
|
||||
};
|
||||
|
||||
SPatchAllocateSyncBuffer sPatchAllocateSyncBuffer{};
|
||||
sPatchAllocateSyncBuffer.SurfaceStateHeapOffset = 0;
|
||||
sPatchAllocateSyncBuffer.DataParamOffset = 0;
|
||||
sPatchAllocateSyncBuffer.DataParamSize = sizeof(uint8_t);
|
||||
|
||||
SPatchBindingTableState sPatchBindingTableState{};
|
||||
sPatchBindingTableState.Offset = sizeof(RENDER_SURFACE_STATE);
|
||||
sPatchBindingTableState.Count = 1;
|
||||
sPatchBindingTableState.SurfaceStateOffset = 0;
|
||||
|
||||
pDevice->allocateSyncBufferHandler();
|
||||
|
||||
size_t offset = 0;
|
||||
@@ -597,8 +587,6 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs
|
||||
|
||||
{
|
||||
MockKernelWithInternals kernelInternals{*pClDevice, context};
|
||||
kernelInternals.kernelInfo.usesSsh = true;
|
||||
kernelInternals.kernelInfo.requiresSshForBuffers = true;
|
||||
auto kernel = kernelInternals.mockKernel;
|
||||
kernel->initialize();
|
||||
|
||||
@@ -610,16 +598,15 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs
|
||||
|
||||
{
|
||||
MockKernelWithInternals kernelInternals{*pClDevice, context};
|
||||
kernelInternals.kernelInfo.usesSsh = true;
|
||||
kernelInternals.kernelInfo.requiresSshForBuffers = true;
|
||||
populateKernelDescriptor(kernelInternals.kernelInfo.kernelDescriptor, sPatchAllocateSyncBuffer);
|
||||
populateKernelDescriptor(kernelInternals.kernelInfo.kernelDescriptor, sPatchBindingTableState);
|
||||
kernelInternals.kernelInfo.setSyncBuffer(sizeof(uint8_t), 0, 0);
|
||||
constexpr auto bindingTableOffset = sizeof(RENDER_SURFACE_STATE);
|
||||
kernelInternals.kernelInfo.setBindingTable(bindingTableOffset, 1);
|
||||
kernelInternals.kernelInfo.heapInfo.SurfaceStateHeapSize = sizeof(RENDER_SURFACE_STATE) + sizeof(BINDING_TABLE_STATE);
|
||||
auto kernel = kernelInternals.mockKernel;
|
||||
kernel->initialize();
|
||||
|
||||
auto bindingTableState = reinterpret_cast<BINDING_TABLE_STATE *>(
|
||||
ptrOffset(kernel->getSurfaceStateHeap(), sPatchBindingTableState.Offset));
|
||||
ptrOffset(kernel->getSurfaceStateHeap(), bindingTableOffset));
|
||||
bindingTableState->setSurfaceStatePointer(0);
|
||||
|
||||
auto mockCmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
|
||||
|
||||
@@ -433,7 +433,7 @@ HWTEST_F(EnqueueKernelTest, WhenEnqueingKernelThenIndirectDataIsAdded) {
|
||||
callOneWorkItemNDRKernel();
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), &pKernel->getKernelInfo().kernelDescriptor, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (pKernel->requiresSshForBuffers() || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) {
|
||||
if (pKernel->usesBindfulAddressingForBuffers() || pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesImages) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
@@ -514,10 +514,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScra
|
||||
uint32_t scratchSize = 4096u;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
||||
|
||||
auto sizeToProgram = PreambleHelper<FamilyType>::getScratchSizeValueToProgramMediaVfeState(scratchSize);
|
||||
|
||||
@@ -554,10 +551,7 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPri
|
||||
uint32_t privateScratchSize = 4096u;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = privateScratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 1);
|
||||
mockKernel.kernelInfo.setPerThreadScratchSize(privateScratchSize, 1);
|
||||
|
||||
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
@@ -1321,7 +1315,7 @@ HWTEST_F(EnqueueKernelTest, givenVMEKernelWhenEnqueueKernelThenDispatchFlagsHave
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice, context);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
mockKernel.kernelInfo.isVmeWorkload = true;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesVme = true;
|
||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_TRUE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
|
||||
}
|
||||
@@ -1373,7 +1367,7 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice, context);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
mockKernel.kernelInfo.isVmeWorkload = false;
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesVme = false;
|
||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
|
||||
}
|
||||
|
||||
@@ -311,10 +311,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc
|
||||
auto scratchSize = GetParam().scratchSize;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
||||
|
||||
uint32_t sizeToProgram = (scratchSize / static_cast<uint32_t>(MemoryConstants::kiloByte));
|
||||
uint32_t bitValue = 0u;
|
||||
@@ -363,14 +360,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc
|
||||
// Generically validate this command
|
||||
PARSE::template validateCommand<MEDIA_VFE_STATE *>(cmdList.begin(), itorCmd);
|
||||
|
||||
scratchSize *= 2;
|
||||
//skip if size to big 4MB, no point in stressing memory allocator.
|
||||
if (allocationSize > 4194304) {
|
||||
return;
|
||||
}
|
||||
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
scratchSize *= 2;
|
||||
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
||||
|
||||
auto itorfirstBBEnd = find<typename FamilyType::MI_BATCH_BUFFER_END *>(itorWalker, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorfirstBBEnd);
|
||||
@@ -447,10 +443,7 @@ HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWi
|
||||
uint32_t scratchSize = 1024u;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
||||
|
||||
uint32_t sizeToProgram = (scratchSize / static_cast<uint32_t>(MemoryConstants::kiloByte));
|
||||
uint32_t bitValue = 0u;
|
||||
@@ -469,7 +462,7 @@ HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWi
|
||||
|
||||
// Enqueue With ScratchSize bigger than previous
|
||||
scratchSize = 8196;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
||||
|
||||
enqueueKernel<FamilyType, false>(mockKernel);
|
||||
|
||||
@@ -490,10 +483,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelWithScratch, givenDeviceForcing32bitAll
|
||||
auto scratchSize = 1024;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
|
||||
SPatchMediaVFEState mediaVFEstate;
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, mediaVFEstate, 0);
|
||||
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
||||
|
||||
enqueueKernel<FamilyType>(mockKernel);
|
||||
auto graphicsAllocation = csr->getScratchAllocation();
|
||||
@@ -522,8 +512,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelWithScratch, givenDeviceForcing32bitAll
|
||||
//now re-try to see if SBA is not programmed
|
||||
|
||||
scratchSize *= 2;
|
||||
|
||||
mediaVFEstate.PerThreadScratchSpace = scratchSize;
|
||||
mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0);
|
||||
|
||||
enqueueKernel<FamilyType>(mockKernel);
|
||||
|
||||
@@ -544,13 +533,8 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfThenPatchCrossThreadData)
|
||||
typedef typename FamilyType::PARSE PARSE;
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||
mockKernel.crossThreadData[64] = 0;
|
||||
|
||||
SPatchAllocateStatelessPrintfSurface patchData;
|
||||
patchData.Size = 256;
|
||||
patchData.DataParamOffset = 64;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, patchData);
|
||||
mockKernel.kernelInfo.setPrintfSurface(sizeof(uintptr_t), 64);
|
||||
|
||||
enqueueKernel<FamilyType, false>(mockKernel);
|
||||
|
||||
@@ -561,14 +545,10 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWhenBeingDispatchedThenL3
|
||||
typedef typename FamilyType::PARSE PARSE;
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||
mockKernel.crossThreadData[64] = 0;
|
||||
|
||||
SPatchAllocateStatelessPrintfSurface patchData;
|
||||
patchData.Size = 256;
|
||||
patchData.DataParamOffset = 64;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, patchData);
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.crossThreadData[64] = 0;
|
||||
mockKernel.kernelInfo.setPrintfSurface(sizeof(uintptr_t), 64);
|
||||
|
||||
auto &csr = mockCmdQueue.getGpgpuCommandStreamReceiver();
|
||||
auto latestSentTaskCount = csr.peekTaskCount();
|
||||
@@ -607,16 +587,12 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlocke
|
||||
typedef typename FamilyType::PARSE PARSE;
|
||||
|
||||
UserEvent userEvent(context);
|
||||
|
||||
MockCommandQueueHw<FamilyType> mockCommandQueue(context, pClDevice, nullptr);
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||
mockKernel.crossThreadData[64] = 0;
|
||||
|
||||
SPatchAllocateStatelessPrintfSurface patchData;
|
||||
patchData.Size = 256;
|
||||
patchData.DataParamOffset = 64;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, patchData);
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.crossThreadData[64] = 0;
|
||||
mockKernel.kernelInfo.setPrintfSurface(sizeof(uintptr_t), 64);
|
||||
|
||||
auto &csr = mockCommandQueue.getGpgpuCommandStreamReceiver();
|
||||
auto latestSentDcFlushTaskCount = csr.peekTaskCount();
|
||||
|
||||
@@ -658,20 +634,9 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUn
|
||||
auto userEvent = make_releaseable<UserEvent>(context);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice);
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||
|
||||
SPatchAllocateStatelessPrintfSurface patchData;
|
||||
patchData.SurfaceStateHeapOffset = undefined<uint32_t>;
|
||||
patchData.Size = 256;
|
||||
patchData.DataParamSize = 8;
|
||||
patchData.DataParamOffset = 0;
|
||||
populateKernelDescriptor(mockKernel.kernelInfo.kernelDescriptor, patchData);
|
||||
|
||||
auto crossThreadData = reinterpret_cast<uint64_t *>(mockKernel.mockKernel->getCrossThreadData());
|
||||
|
||||
mockKernel.kernelInfo.setPrintfSurface(sizeof(uintptr_t), 0);
|
||||
std::string testString = "test";
|
||||
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelMetadata.printfStringsMap.insert(std::make_pair(0, testString));
|
||||
mockKernel.kernelInfo.addToPrintfStringsMap(0, testString);
|
||||
|
||||
cl_uint workDim = 1;
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
@@ -691,6 +656,7 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUn
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto crossThreadData = reinterpret_cast<uint64_t *>(mockKernel.mockKernel->getCrossThreadData());
|
||||
auto printfAllocation = reinterpret_cast<uint32_t *>(*crossThreadData);
|
||||
printfAllocation[0] = 8;
|
||||
printfAllocation[1] = 0;
|
||||
@@ -841,23 +807,15 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe
|
||||
buffer3.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice, context);
|
||||
mockKernel.kernelInfo.kernelArgInfo.resize(6);
|
||||
for (auto &kernelInfo : mockKernel.kernelInfo.kernelArgInfo) {
|
||||
kernelInfo.kernelArgPatchInfoVector.resize(1);
|
||||
}
|
||||
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(1).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(1).pureStatefulBufferAccess = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(2).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(2).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(3).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(3).pureStatefulBufferAccess = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(4).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(4).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(5).isBuffer = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(5).pureStatefulBufferAccess = false;
|
||||
auto &args = mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs;
|
||||
args.resize(6);
|
||||
args[0].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = true;
|
||||
args[1].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = false;
|
||||
args[2].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = true;
|
||||
args[3].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = false;
|
||||
args[4].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = true;
|
||||
args[5].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = true;
|
||||
|
||||
mockKernel.mockKernel->initialize();
|
||||
EXPECT_TRUE(mockKernel.mockKernel->auxTranslationRequired);
|
||||
@@ -905,9 +863,8 @@ HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueue
|
||||
cl_mem clMem = &buffer;
|
||||
|
||||
buffer.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
mockKernel.kernelInfo.kernelArgInfo.resize(1);
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1);
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
|
||||
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = true;
|
||||
mockKernel.mockKernel->initialize();
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
|
||||
@@ -945,9 +902,8 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe
|
||||
cl_mem clMem = &buffer;
|
||||
|
||||
buffer.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
mockKernel.kernelInfo.kernelArgInfo.resize(1);
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1);
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
|
||||
mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as<ArgDescPointer>(true).accessedUsingStatelessAddressingMode = true;
|
||||
mockKernel.mockKernel->initialize();
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
|
||||
|
||||
@@ -197,7 +197,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenIndi
|
||||
|
||||
EXPECT_NE(dshBefore, pDSH->getUsed());
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->usesBindfulAddressingForBuffers()) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
@@ -575,23 +575,23 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr
|
||||
if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) {
|
||||
const auto &surfaceStateDst = getSurfaceState<FamilyType>(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1);
|
||||
|
||||
if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) {
|
||||
if (kernelInfo.getArgDescriptorAt(1).as<ArgDescPointer>().pointerSize == sizeof(uint64_t)) {
|
||||
auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
kernelInfo.getArgDescriptorAt(1).as<ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedDstPtr, 4)), *pKernelArg);
|
||||
EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress());
|
||||
|
||||
} else if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
|
||||
} else if (kernelInfo.getArgDescriptorAt(1).as<ArgDescPointer>().pointerSize == sizeof(uint32_t)) {
|
||||
auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
kernelInfo.getArgDescriptorAt(1).as<ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedDstPtr, 4)), static_cast<uint64_t>(*pKernelArg));
|
||||
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress());
|
||||
}
|
||||
}
|
||||
|
||||
if (kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 DstOrigin
|
||||
if (kernelInfo.getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size == 4 * sizeof(uint32_t)) { // size of uint4 DstOrigin
|
||||
auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
kernelInfo.getArgDescriptorAt(3).as<ArgDescValue>().elements[0].offset);
|
||||
EXPECT_EQ(hostOffset.x + ptrDiff(misalignedDstPtr, alignDown(misalignedDstPtr, 4)), *dstOffset);
|
||||
} else {
|
||||
// DstOrigin arg should be 16 bytes in size, if that changes, above if path should be modified
|
||||
|
||||
@@ -160,7 +160,7 @@ HWTEST_F(EnqueueReadBufferTypeTest, WhenReadingBufferThenIndirectDataIsAdded) {
|
||||
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernelDescriptor, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->usesBindfulAddressingForBuffers()) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -171,7 +171,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenInd
|
||||
|
||||
EXPECT_NE(dshBefore, pDSH->getUsed());
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->usesBindfulAddressingForBuffers()) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
@@ -573,23 +573,23 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr
|
||||
if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) {
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0);
|
||||
|
||||
if (kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) {
|
||||
if (kernelInfo.getArgDescriptorAt(0).as<ArgDescPointer>().pointerSize == sizeof(uint64_t)) {
|
||||
auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
kernelInfo.getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedHostPtr, 4)), *pKernelArg);
|
||||
EXPECT_EQ(*pKernelArg, surfaceState.getSurfaceBaseAddress());
|
||||
|
||||
} else if (kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) {
|
||||
} else if (kernelInfo.getArgDescriptorAt(0).as<ArgDescPointer>().pointerSize == sizeof(uint32_t)) {
|
||||
auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
kernelInfo.getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedHostPtr, 4)), static_cast<uint64_t>(*pKernelArg));
|
||||
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceState.getSurfaceBaseAddress());
|
||||
}
|
||||
}
|
||||
|
||||
if (kernelInfo.kernelArgInfo[2].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 SrcOrigin
|
||||
if (kernelInfo.getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size == 4 * sizeof(uint32_t)) { // size of uint4 SrcOrigin
|
||||
auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
kernelInfo.getArgDescriptorAt(2).as<ArgDescValue>().elements[0].offset);
|
||||
EXPECT_EQ(hostOffset.x + ptrDiff(misalignedHostPtr, alignDown(misalignedHostPtr, 4)), *dstOffset);
|
||||
} else {
|
||||
// SrcOrigin arg should be 16 bytes in size, if that changes, above if path should be modified
|
||||
|
||||
@@ -158,7 +158,7 @@ HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenIndirectDataIsAdded) {
|
||||
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernelDescriptor, rootDeviceIndex));
|
||||
EXPECT_NE(iohBefore, pIOH->getUsed());
|
||||
if (kernel->requiresSshForBuffers()) {
|
||||
if (kernel->usesBindfulAddressingForBuffers()) {
|
||||
EXPECT_NE(sshBefore, pSSH->getUsed());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,7 +180,7 @@ HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenSurfaceStateIsProgrammedCorr
|
||||
mockCmdQ->storeMultiDispatchInfo = true;
|
||||
enqueueWriteImage<FamilyType>();
|
||||
|
||||
auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().kernelArgInfo[1].offsetHeap / sizeof(RENDER_SURFACE_STATE);
|
||||
auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().getArgDescriptorAt(1).template as<ArgDescImage>().bindful / sizeof(RENDER_SURFACE_STATE);
|
||||
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), static_cast<uint32_t>(index));
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenRes
|
||||
auto &kernelInfo = pKernel->getKernelInfo();
|
||||
|
||||
auto pKernelArg =
|
||||
(uint32_t *)(pKernel->getCrossThreadData() + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset);
|
||||
(uint32_t *)(pKernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
|
||||
auto address1 = static_cast<uint64_t>(*pKernelArg);
|
||||
auto sharedBufferGpuAddress =
|
||||
|
||||
@@ -79,10 +79,7 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
|
||||
}
|
||||
|
||||
void patchAllocateSyncBuffer() {
|
||||
sPatchAllocateSyncBuffer.SurfaceStateHeapOffset = 0;
|
||||
sPatchAllocateSyncBuffer.DataParamOffset = 0;
|
||||
sPatchAllocateSyncBuffer.DataParamSize = sizeof(uint8_t);
|
||||
populateKernelDescriptor(kernelInternals->kernelInfo.kernelDescriptor, sPatchAllocateSyncBuffer);
|
||||
kernelInternals->kernelInfo.setSyncBuffer(sizeof(uint8_t), 0, 0);
|
||||
}
|
||||
|
||||
MockSyncBufferHandler *getSyncBufferHandler() {
|
||||
@@ -107,7 +104,6 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
|
||||
std::unique_ptr<MockKernelWithInternals> kernelInternals;
|
||||
MockKernel *kernel;
|
||||
MockCommandQueue *commandQueue;
|
||||
SPatchAllocateSyncBuffer sPatchAllocateSyncBuffer;
|
||||
HwHelper *hwHelper;
|
||||
};
|
||||
|
||||
@@ -176,15 +172,14 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernel
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBufferThenSshIsProperlyPatched) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
kernelInternals->kernelInfo.usesSsh = true;
|
||||
kernelInternals->kernelInfo.requiresSshForBuffers = true;
|
||||
kernelInternals->kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindfulAndStateless;
|
||||
kernelInternals->kernelInfo.setBufferAddressingMode(KernelDescriptor::BindfulAndStateless);
|
||||
|
||||
patchAllocateSyncBuffer();
|
||||
|
||||
pDevice->allocateSyncBufferHandler();
|
||||
auto syncBufferHandler = getSyncBufferHandler();
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(kernel->getSurfaceStateHeap(),
|
||||
sPatchAllocateSyncBuffer.SurfaceStateHeapOffset));
|
||||
kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindful));
|
||||
auto bufferAddress = syncBufferHandler->graphicsAllocation->getGpuAddress();
|
||||
surfaceState->setSurfaceBaseAddress(bufferAddress + 1);
|
||||
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
Reference in New Issue
Block a user