Refactor SBA handling + fix unit tests
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
parent
8351fc9f14
commit
f2bbd63d37
|
@ -40,7 +40,7 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
|
|||
NEO::EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(commandStream, hwInfo, isRcs);
|
||||
NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(commandStream, {}, true, hwInfo, isRcs);
|
||||
|
||||
auto pSbaCmd = static_cast<STATE_BASE_ADDRESS *>(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS)));
|
||||
auto sbaCmdBuf = static_cast<STATE_BASE_ADDRESS *>(NEO::StateBaseAddressHelper<GfxFamily>::getSpaceForSbaCmd(commandStream));
|
||||
STATE_BASE_ADDRESS sbaCmd;
|
||||
|
||||
bool useGlobalSshAndDsh = NEO::ApiSpecificConfig::getBindlessConfiguration();
|
||||
|
@ -69,7 +69,7 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
|
|||
NEO::MemoryCompressionState::NotApplicable,
|
||||
false,
|
||||
1u);
|
||||
*pSbaCmd = sbaCmd;
|
||||
*sbaCmdBuf = sbaCmd;
|
||||
csr->setGSBAStateDirty(false);
|
||||
|
||||
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) {
|
||||
|
|
|
@ -31,7 +31,8 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
|
|||
bool isRcs = this->getCsr()->isRcs();
|
||||
|
||||
NEO::EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(commandStream, hwInfo, isRcs);
|
||||
auto pSbaCmd = static_cast<STATE_BASE_ADDRESS *>(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS)));
|
||||
auto sbaCmdBuf = static_cast<STATE_BASE_ADDRESS *>(NEO::StateBaseAddressHelper<GfxFamily>::getSpaceForSbaCmd(commandStream));
|
||||
|
||||
STATE_BASE_ADDRESS sbaCmd;
|
||||
bool multiOsContextCapable = device->isImplicitScalingCapable();
|
||||
NEO::StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(&sbaCmd,
|
||||
|
@ -51,12 +52,12 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
|
|||
NEO::MemoryCompressionState::NotApplicable,
|
||||
false,
|
||||
1u);
|
||||
*pSbaCmd = sbaCmd;
|
||||
*sbaCmdBuf = sbaCmd;
|
||||
|
||||
auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) {
|
||||
pSbaCmd = static_cast<STATE_BASE_ADDRESS *>(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS)));
|
||||
*pSbaCmd = sbaCmd;
|
||||
sbaCmdBuf = static_cast<STATE_BASE_ADDRESS *>(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS)));
|
||||
*sbaCmdBuf = sbaCmd;
|
||||
}
|
||||
|
||||
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) {
|
||||
|
|
|
@ -861,8 +861,14 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithS
|
|||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto pc = genCmdCast<PIPE_CONTROL *>(*cmdList.rbegin());
|
||||
|
||||
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo())) {
|
||||
EXPECT_NE(nullptr, pc);
|
||||
EXPECT_TRUE(pc->getDcFlushEnable());
|
||||
} else {
|
||||
EXPECT_EQ(nullptr, pc);
|
||||
}
|
||||
}
|
||||
|
||||
using SupportedPlatformsSklIcllp = IsWithinProducts<IGFX_SKYLAKE, IGFX_ICELAKE>;
|
||||
|
|
|
@ -594,6 +594,8 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith
|
|||
|
||||
HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
|
@ -605,9 +607,19 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThen
|
|||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
|
||||
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo())) {
|
||||
itor--;
|
||||
EXPECT_NE(nullptr, genCmdCast<PIPE_CONTROL *>(*itor));
|
||||
} else {
|
||||
if (cmdList.begin() != itor) {
|
||||
itor--;
|
||||
EXPECT_EQ(nullptr, genCmdCast<PIPE_CONTROL *>(*itor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammedOnlyOnce) {
|
||||
|
@ -627,11 +639,18 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenP
|
|||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
itor++;
|
||||
auto itor2 = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor2);
|
||||
|
||||
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo())) {
|
||||
itor--;
|
||||
EXPECT_NE(nullptr, genCmdCast<PIPE_CONTROL *>(*itor));
|
||||
} else {
|
||||
if (cmdList.begin() != itor) {
|
||||
itor--;
|
||||
EXPECT_EQ(nullptr, genCmdCast<PIPE_CONTROL *>(*itor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendWaitEventsWithHostScopeThenPipeControlAndSemWaitAreAddedFromCommandList) {
|
||||
|
|
|
@ -452,14 +452,15 @@ HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyCalled
|
|||
GenCmdList genCmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<PIPE_CONTROL *>(genCmdList.begin(), genCmdList.end());
|
||||
ASSERT_NE(genCmdList.end(), itor);
|
||||
PIPE_CONTROL *cmd = nullptr;
|
||||
while (itor != genCmdList.end()) {
|
||||
cmd = genCmdCast<PIPE_CONTROL *>(*itor);
|
||||
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
|
||||
|
||||
auto pc = genCmdCast<PIPE_CONTROL *>(*genCmdList.rbegin());
|
||||
|
||||
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo())) {
|
||||
EXPECT_NE(nullptr, pc);
|
||||
EXPECT_TRUE(pc->getDcFlushEnable());
|
||||
} else {
|
||||
EXPECT_EQ(nullptr, pc);
|
||||
}
|
||||
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); // NOLINT(clang-analyzer-core.CallAndMessage)
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
|
|
|
@ -106,14 +106,15 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionC
|
|||
GenCmdList genCmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
auto itor = find<PIPE_CONTROL *>(genCmdList.begin(), genCmdList.end());
|
||||
ASSERT_NE(genCmdList.end(), itor);
|
||||
PIPE_CONTROL *cmd = nullptr;
|
||||
while (itor != genCmdList.end()) {
|
||||
cmd = genCmdCast<PIPE_CONTROL *>(*itor);
|
||||
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
|
||||
|
||||
auto pc = genCmdCast<PIPE_CONTROL *>(*genCmdList.rbegin());
|
||||
|
||||
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo())) {
|
||||
EXPECT_NE(nullptr, pc);
|
||||
EXPECT_TRUE(pc->getDcFlushEnable());
|
||||
} else {
|
||||
EXPECT_EQ(nullptr, pc);
|
||||
}
|
||||
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); // NOLINT(clang-analyzer-core.CallAndMessage)
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsAtLeastSkl) {
|
||||
|
|
|
@ -112,6 +112,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphic
|
|||
|
||||
HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppendingWaitOnEventThenPCWithDcFlushIsGenerated) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
const ze_event_desc_t eventDesc = {
|
||||
|
@ -134,14 +135,21 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe
|
|||
ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
usedSpaceAfter));
|
||||
|
||||
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end()).back();
|
||||
ASSERT_NE(cmdList.end(), itorPC);
|
||||
{
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
|
||||
ASSERT_NE(cmd, nullptr);
|
||||
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
|
||||
if (NEO::MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, device->getHwInfo())) {
|
||||
itor--;
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
|
||||
|
||||
ASSERT_NE(nullptr, cmd);
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable());
|
||||
EXPECT_TRUE(cmd->getDcFlushEnable());
|
||||
} else {
|
||||
if (cmdList.begin() != itor) {
|
||||
itor--;
|
||||
EXPECT_EQ(nullptr, genCmdCast<PIPE_CONTROL *>(*itor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -175,14 +175,14 @@ HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenSurfaceStateI
|
|||
const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo();
|
||||
uint32_t index = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(1).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index);
|
||||
const auto surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index);
|
||||
const auto &imageDesc = dstImage->getImageDesc();
|
||||
// EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight());
|
||||
EXPECT_NE(0u, surfaceState.getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState.getSurfaceType());
|
||||
auto surfaceFormat = surfaceState.getSurfaceFormat();
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight());
|
||||
EXPECT_NE(0u, surfaceState->getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState->getSurfaceType());
|
||||
auto surfaceFormat = surfaceState->getSurfaceFormat();
|
||||
bool isRedescribedFormat =
|
||||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT ||
|
||||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT ||
|
||||
|
@ -190,9 +190,9 @@ HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenSurfaceStateI
|
|||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT ||
|
||||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT;
|
||||
EXPECT_TRUE(isRedescribedFormat);
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenNumberOfPipelineSelectsIsOne, IsAtMostXeHpcCore) {
|
||||
|
|
|
@ -193,13 +193,13 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) {
|
|||
const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo();
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
uint32_t index = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(i).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index);
|
||||
const auto surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index);
|
||||
const auto &imageDesc = dstImage->getImageDesc();
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight());
|
||||
EXPECT_NE(0u, surfaceState.getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState.getSurfaceType());
|
||||
auto surfaceFormat = surfaceState.getSurfaceFormat();
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight());
|
||||
EXPECT_NE(0u, surfaceState->getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState->getSurfaceType());
|
||||
auto surfaceFormat = surfaceState->getSurfaceFormat();
|
||||
bool isRedescribedFormat =
|
||||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT ||
|
||||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT ||
|
||||
|
@ -207,17 +207,17 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) {
|
|||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT ||
|
||||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT;
|
||||
EXPECT_TRUE(isRedescribedFormat);
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment());
|
||||
}
|
||||
|
||||
uint32_t srcIndex = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(0).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &srcSurfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), srcIndex);
|
||||
EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), srcSurfaceState.getSurfaceBaseAddress());
|
||||
const auto srcSurfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), srcIndex);
|
||||
EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), srcSurfaceState->getSurfaceBaseAddress());
|
||||
|
||||
uint32_t dstIndex = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(1).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
const auto &dstSurfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), dstIndex);
|
||||
EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), dstSurfaceState.getSurfaceBaseAddress());
|
||||
const auto dstSurfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), dstIndex);
|
||||
EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), dstSurfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueCopyImageTest, WhenCopyingImageThenNumberOfPipelineSelectsIsOne, IsAtMostXeHpcCore) {
|
||||
|
|
|
@ -169,14 +169,14 @@ HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenSurfaceStateI
|
|||
|
||||
enqueueCopyImageToBuffer<FamilyType>();
|
||||
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0);
|
||||
const auto surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0);
|
||||
const auto &imageDesc = srcImage->getImageDesc();
|
||||
// EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight());
|
||||
EXPECT_NE(0u, surfaceState.getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState.getSurfaceType());
|
||||
auto surfaceFormat = surfaceState.getSurfaceFormat();
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight());
|
||||
EXPECT_NE(0u, surfaceState->getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState->getSurfaceType());
|
||||
auto surfaceFormat = surfaceState->getSurfaceFormat();
|
||||
bool isRedescribedFormat =
|
||||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT ||
|
||||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT ||
|
||||
|
@ -184,9 +184,9 @@ HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenSurfaceStateI
|
|||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT ||
|
||||
surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT;
|
||||
EXPECT_TRUE(isRedescribedFormat);
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenNumberOfPipelineSelectsIsOne, IsAtMostXeHpcCore) {
|
||||
|
|
|
@ -200,15 +200,15 @@ HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenSurfaceStateIsCorrect) {
|
|||
const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo();
|
||||
uint32_t index = static_cast<uint32_t>(kernelInfo.getArgDescriptorAt(0).template as<ArgDescImage>().bindful) / sizeof(RENDER_SURFACE_STATE);
|
||||
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index);
|
||||
const auto surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index);
|
||||
const auto &imageDesc = image->getImageDesc();
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight());
|
||||
EXPECT_NE(0u, surfaceState.getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState.getSurfaceType());
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(image->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight());
|
||||
EXPECT_NE(0u, surfaceState->getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState->getSurfaceType());
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(image->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueFillImageTest, WhenFillingImageThenNumberOfPipelineSelectsIsOne, IsAtMostXeHpcCore) {
|
||||
|
|
|
@ -708,9 +708,9 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs
|
|||
ClHardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*mockCmdQ);
|
||||
|
||||
auto &surfaceState = hwParser.getSurfaceState<FamilyType>(&surfaceStateHeap, 0);
|
||||
auto surfaceState = hwParser.getSurfaceState<FamilyType>(&surfaceStateHeap, 0);
|
||||
auto pSyncBufferHandler = static_cast<MockSyncBufferHandler *>(pDevice->syncBufferHandler.get());
|
||||
EXPECT_EQ(pSyncBufferHandler->graphicsAllocation->getGpuAddress(), surfaceState.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(pSyncBufferHandler->graphicsAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -612,13 +612,13 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr
|
|||
auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.getArgDescriptorAt(1).as<ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedDstPtr, 4)), *pKernelArg);
|
||||
EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(*pKernelArg, surfaceStateDst->getSurfaceBaseAddress());
|
||||
|
||||
} else if (kernelInfo.getArgDescriptorAt(1).as<ArgDescPointer>().pointerSize == sizeof(uint32_t)) {
|
||||
auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.getArgDescriptorAt(1).as<ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedDstPtr, 4)), static_cast<uint64_t>(*pKernelArg));
|
||||
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceStateDst->getSurfaceBaseAddress());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -640,18 +640,18 @@ HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenSurfaceStateIsCorrect) {
|
|||
|
||||
// BufferToImage kernel uses BTI=1 for destSurface
|
||||
uint32_t bindingTableIndex = 0;
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), bindingTableIndex);
|
||||
const auto surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), bindingTableIndex);
|
||||
|
||||
// EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes
|
||||
const auto &imageDesc = srcImage->getImageDesc();
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight());
|
||||
EXPECT_NE(0u, surfaceState.getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState.getSurfaceType());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState.getSurfaceFormat());
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceState.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight());
|
||||
EXPECT_NE(0u, surfaceState->getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState->getSurfaceType());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState->getSurfaceFormat());
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueReadImageTest, WhenReadingImageThenPipelineSelectIsProgrammed, IsAtMostXeHpcCore) {
|
||||
|
|
|
@ -605,19 +605,19 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr
|
|||
auto &kernelInfo = kernel->getKernelInfo();
|
||||
|
||||
if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) {
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&cmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0);
|
||||
const auto surfaceState = getSurfaceState<FamilyType>(&cmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0);
|
||||
|
||||
if (kernelInfo.getArgDescriptorAt(0).as<ArgDescPointer>().pointerSize == sizeof(uint64_t)) {
|
||||
auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedHostPtr, 4)), *pKernelArg);
|
||||
EXPECT_EQ(*pKernelArg, surfaceState.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(*pKernelArg, surfaceState->getSurfaceBaseAddress());
|
||||
|
||||
} else if (kernelInfo.getArgDescriptorAt(0).as<ArgDescPointer>().pointerSize == sizeof(uint32_t)) {
|
||||
auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() +
|
||||
kernelInfo.getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(alignDown(misalignedHostPtr, 4)), static_cast<uint64_t>(*pKernelArg));
|
||||
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceState.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -198,18 +198,18 @@ HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenSurfaceStateIsProgrammedCorr
|
|||
|
||||
auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().getArgDescriptorAt(1).template as<ArgDescImage>().bindful / sizeof(RENDER_SURFACE_STATE);
|
||||
|
||||
const auto &surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), static_cast<uint32_t>(index));
|
||||
const auto surfaceState = getSurfaceState<FamilyType>(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), static_cast<uint32_t>(index));
|
||||
|
||||
// EnqueueWriteImage uses multi-byte copies depending on per-pixel-size-in-bytes
|
||||
const auto &imageDesc = dstImage->getImageDesc();
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight());
|
||||
EXPECT_NE(0u, surfaceState.getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState.getSurfaceType());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState.getSurfaceFormat());
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(dstAllocation->getGpuAddress(), surfaceState.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(imageDesc.image_width, surfaceState->getWidth());
|
||||
EXPECT_EQ(imageDesc.image_height, surfaceState->getHeight());
|
||||
EXPECT_NE(0u, surfaceState->getSurfacePitch());
|
||||
EXPECT_NE(0u, surfaceState->getSurfaceType());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState->getSurfaceFormat());
|
||||
EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState->getSurfaceHorizontalAlignment());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState->getSurfaceVerticalAlignment());
|
||||
EXPECT_EQ(dstAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(EnqueueWriteImageTest, WhenWritingImageThenOnePipelineSelectIsProgrammed, IsAtMostXeHpcCore) {
|
||||
|
|
|
@ -484,42 +484,6 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) {
|
|||
userEventObj->release();
|
||||
}
|
||||
|
||||
HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenPipeControlsArePresentInCS) {
|
||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||
|
||||
cl_event event = nullptr;
|
||||
cl_event userEvent = new UserEvent();
|
||||
static_cast<CommandQueueHw<FamilyType> *>(pCmdQ)->enqueueMarkerWithWaitList(1, &userEvent, &event);
|
||||
|
||||
auto eventObj = static_cast<Event *>(event);
|
||||
EXPECT_FALSE(eventObj->isCPUProfilingPath());
|
||||
|
||||
auto userEventObj = static_cast<UserEvent *>(userEvent);
|
||||
|
||||
pCmdQ->flush();
|
||||
userEventObj->setStatus(CL_COMPLETE);
|
||||
Event::waitForEvents(1, &event);
|
||||
|
||||
parseCommands<FamilyType>(*pCmdQ);
|
||||
|
||||
// Check PIPE_CONTROLs
|
||||
auto itorFirstPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorFirstPC);
|
||||
auto pFirstPC = genCmdCast<PIPE_CONTROL *>(*itorFirstPC);
|
||||
ASSERT_NE(nullptr, pFirstPC);
|
||||
|
||||
auto itorSecondPC = find<PIPE_CONTROL *>(itorFirstPC, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorSecondPC);
|
||||
auto pSecondPC = genCmdCast<PIPE_CONTROL *>(*itorSecondPC);
|
||||
ASSERT_NE(nullptr, pSecondPC);
|
||||
|
||||
EXPECT_TRUE(static_cast<MockEvent<Event> *>(event)->calcProfilingData());
|
||||
|
||||
eventObj->release();
|
||||
userEventObj->release();
|
||||
pCmdQ->isQueueBlocked();
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
struct MockTagNode : public TagNode<TagType> {
|
||||
public:
|
||||
|
|
|
@ -118,12 +118,12 @@ HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCa
|
|||
if (arg1AsPtr.pointerSize == sizeof(uint64_t)) {
|
||||
auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + arg1AsPtr.stateless);
|
||||
EXPECT_EQ(alignDown(gpuVa, 4), static_cast<uint64_t>(*pKernelArg));
|
||||
EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(*pKernelArg, surfaceStateDst->getSurfaceBaseAddress());
|
||||
|
||||
} else if (arg1AsPtr.pointerSize == sizeof(uint32_t)) {
|
||||
auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + arg1AsPtr.stateless);
|
||||
EXPECT_EQ(alignDown(gpuVa, 4), static_cast<uint64_t>(*pKernelArg));
|
||||
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress());
|
||||
EXPECT_EQ(static_cast<uint64_t>(*pKernelArg), surfaceStateDst->getSurfaceBaseAddress());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -205,10 +205,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
|
||||
if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) {
|
||||
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
|
||||
//for ImmediateDispatch we will send this right away, therefore this pipe control will close the level
|
||||
//for BatchedSubmissions it will be nooped and only last ppc in batch will be emitted.
|
||||
// for ImmediateDispatch we will send this right away, therefore this pipe control will close the level
|
||||
// for BatchedSubmissions it will be nooped and only last ppc in batch will be emitted.
|
||||
levelClosed = true;
|
||||
//if we guard with ppc, flush dc as well to speed up completion latency
|
||||
// if we guard with ppc, flush dc as well to speed up completion latency
|
||||
if (dispatchFlags.guardCommandBufferWithPipeControl) {
|
||||
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
if (hwInfoConfig.isDcFlushAllowed()) {
|
||||
|
@ -385,7 +385,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
lastMemoryCompressionState = memoryCompressionState;
|
||||
}
|
||||
|
||||
//Reprogram state base address if required
|
||||
// Reprogram state base address if required
|
||||
if (isStateBaseAddressDirty || sourceLevelDebuggerActive) {
|
||||
EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(commandStreamCSR, hwInfo, isRcs());
|
||||
EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs, true, hwInfo, isRcs());
|
||||
|
@ -401,7 +401,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
}
|
||||
|
||||
auto stateBaseAddressCmdOffset = commandStreamCSR.getUsed();
|
||||
auto pCmd = static_cast<STATE_BASE_ADDRESS *>(commandStreamCSR.getSpace(sizeof(STATE_BASE_ADDRESS)));
|
||||
auto pCmd = static_cast<STATE_BASE_ADDRESS *>(StateBaseAddressHelper<GfxFamily>::getSpaceForSbaCmd(commandStreamCSR));
|
||||
STATE_BASE_ADDRESS cmd;
|
||||
auto instructionHeapBaseAddress = getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, getMemoryManager()->isLocalMemoryUsedForIsa(rootDeviceIndex));
|
||||
StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
||||
|
@ -422,7 +422,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
memoryCompressionState,
|
||||
dispatchFlags.useGlobalAtomics,
|
||||
dispatchFlags.areMultipleSubDevicesInContext);
|
||||
*pCmd = cmd;
|
||||
|
||||
if (pCmd) {
|
||||
*pCmd = cmd;
|
||||
}
|
||||
|
||||
programAdditionalStateBaseAddress(commandStreamCSR, cmd, device);
|
||||
|
||||
|
@ -618,7 +621,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
|
||||
this->wasSubmittedToSingleSubdevice = dispatchFlags.useSingleSubdevice;
|
||||
|
||||
//check if we are not over the budget, if we are do implicit flush
|
||||
// check if we are not over the budget, if we are do implicit flush
|
||||
if (getMemoryManager()->isMemoryBudgetExhausted()) {
|
||||
if (this->totalMemoryUsed >= device.getDeviceInfo().globalMemSize / 4) {
|
||||
implicitFlush = true;
|
||||
|
@ -726,16 +729,16 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
|
|||
|
||||
while (nextCommandBuffer && nextCommandBuffer->inspectionId == primaryCmdBuffer->inspectionId) {
|
||||
|
||||
//noop pipe control
|
||||
// noop pipe control
|
||||
if (currentPipeControlForNooping) {
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
flatBatchBufferHelper->removePipeControlData(pipeControlLocationSize, currentPipeControlForNooping, hwInfo);
|
||||
}
|
||||
memset(currentPipeControlForNooping, 0, pipeControlLocationSize);
|
||||
}
|
||||
//obtain next candidate for nooping
|
||||
// obtain next candidate for nooping
|
||||
currentPipeControlForNooping = nextCommandBuffer->pipeControlThatMayBeErasedLocation;
|
||||
//track epilogue pipe control
|
||||
// track epilogue pipe control
|
||||
epiloguePipeControlLocation = nextCommandBuffer->epiloguePipeControlLocation;
|
||||
|
||||
flushStampUpdateHelper.insert(nextCommandBuffer->flushStamp->getStampReference());
|
||||
|
@ -744,7 +747,7 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
|
|||
auto cpuAddressForCommandBufferDestination = ptrOffset(nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), nextCommandBuffer->batchBuffer.startOffset);
|
||||
auto cpuAddressForCurrentCommandBufferEndingSection = alignUp(ptrOffset(currentBBendLocation, sizeof(MI_BATCH_BUFFER_START)), MemoryConstants::cacheLineSize);
|
||||
|
||||
//if we point to exact same command buffer, then batch buffer start is not needed at all
|
||||
// if we point to exact same command buffer, then batch buffer start is not needed at all
|
||||
if (cpuAddressForCurrentCommandBufferEndingSection == cpuAddressForCommandBufferDestination) {
|
||||
memset(currentBBendLocation, 0u, ptrDiff(cpuAddressForCurrentCommandBufferEndingSection, currentBBendLocation));
|
||||
} else {
|
||||
|
@ -766,7 +769,7 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
|
|||
surfacesForSubmit.push_back(surface);
|
||||
}
|
||||
|
||||
//make sure we flush DC if needed
|
||||
// make sure we flush DC if needed
|
||||
if (epiloguePipeControlLocation && MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo)) {
|
||||
|
||||
auto emitDcFlush = true;
|
||||
|
@ -784,7 +787,7 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
|
|||
break;
|
||||
}
|
||||
|
||||
//after flush task level is closed
|
||||
// after flush task level is closed
|
||||
this->taskLevel++;
|
||||
|
||||
flushStampUpdateHelper.updateAll(flushStamp->peekStamp());
|
||||
|
@ -885,7 +888,7 @@ inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNot
|
|||
auto status = waitForCompletionWithTimeout(params, taskCountToWait);
|
||||
if (status == WaitStatus::NotReady) {
|
||||
waitForFlushStamp(flushStampToWait);
|
||||
//now call blocking wait, this is to ensure that task count is reached
|
||||
// now call blocking wait, this is to ensure that task count is reached
|
||||
status = waitForCompletionWithTimeout(WaitParams{false, false, 0}, taskCountToWait);
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@ template <typename GfxFamily>
|
|||
struct StateBaseAddressHelper {
|
||||
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
|
||||
|
||||
static void *getSpaceForSbaCmd(LinearStream &cmdStream);
|
||||
|
||||
static void programStateBaseAddress(
|
||||
STATE_BASE_ADDRESS *stateBaseAddress,
|
||||
const IndirectHeap *dsh,
|
||||
|
|
|
@ -103,4 +103,9 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
|||
isMultiOsContextCapable, memoryCompressionState, overrideBindlessSurfaceStateBase, useGlobalAtomics, areMultipleSubDevicesInContext);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void *StateBaseAddressHelper<GfxFamily>::getSpaceForSbaCmd(LinearStream &cmdStream) {
|
||||
return cmdStream.getSpace(sizeof(STATE_BASE_ADDRESS));
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -94,28 +94,7 @@ struct HardwareParse {
|
|||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
const typename FamilyType::RENDER_SURFACE_STATE &getSurfaceState(IndirectHeap *ssh, uint32_t index) {
|
||||
typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE;
|
||||
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
|
||||
|
||||
const auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData;
|
||||
|
||||
auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress;
|
||||
auto surfaceStateHeap = cmdSBA->getSurfaceStateBaseAddress();
|
||||
if (ssh && (ssh->getHeapGpuBase() == surfaceStateHeap)) {
|
||||
surfaceStateHeap = reinterpret_cast<uint64_t>(ssh->getCpuBase());
|
||||
}
|
||||
EXPECT_NE(0u, surfaceStateHeap);
|
||||
|
||||
auto bindingTablePointer = interfaceDescriptorData.getBindingTablePointer();
|
||||
|
||||
const auto &bindingTableState = reinterpret_cast<BINDING_TABLE_STATE *>(surfaceStateHeap + bindingTablePointer)[index];
|
||||
auto surfaceStatePointer = bindingTableState.getSurfaceStatePointer();
|
||||
|
||||
return *(RENDER_SURFACE_STATE *)(surfaceStateHeap + surfaceStatePointer);
|
||||
}
|
||||
const typename FamilyType::RENDER_SURFACE_STATE *getSurfaceState(IndirectHeap *ssh, uint32_t index);
|
||||
|
||||
template <typename FamilyType>
|
||||
const typename FamilyType::SAMPLER_STATE &getSamplerState(uint32_t index) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -123,4 +123,28 @@ const void *HardwareParse::getStatelessArgumentPointer(const KernelInfo &kernelI
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
const typename FamilyType::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState(IndirectHeap *ssh, uint32_t index) {
|
||||
typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE;
|
||||
typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA;
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
|
||||
|
||||
const auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData;
|
||||
|
||||
auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress;
|
||||
auto surfaceStateHeap = cmdSBA->getSurfaceStateBaseAddress();
|
||||
if (ssh && (ssh->getHeapGpuBase() == surfaceStateHeap)) {
|
||||
surfaceStateHeap = reinterpret_cast<uint64_t>(ssh->getCpuBase());
|
||||
}
|
||||
EXPECT_NE(0u, surfaceStateHeap);
|
||||
|
||||
auto bindingTablePointer = interfaceDescriptorData.getBindingTablePointer();
|
||||
|
||||
const auto &bindingTableState = reinterpret_cast<BINDING_TABLE_STATE *>(surfaceStateHeap + bindingTablePointer)[index];
|
||||
auto surfaceStatePointer = bindingTableState.getSurfaceStatePointer();
|
||||
|
||||
return (RENDER_SURFACE_STATE *)(surfaceStateHeap + surfaceStatePointer);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -86,4 +86,5 @@ namespace NEO {
|
|||
template void HardwareParse::findHardwareCommands<ICLFamily>();
|
||||
template void HardwareParse::findHardwareCommands<ICLFamily>(IndirectHeap *);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<ICLFamily>(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
|
||||
template const typename ICLFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState<ICLFamily>(IndirectHeap *ssh, uint32_t index);
|
||||
} // namespace NEO
|
||||
|
|
|
@ -129,4 +129,5 @@ namespace NEO {
|
|||
template void HardwareParse::findHardwareCommands<TGLLPFamily>();
|
||||
template void HardwareParse::findHardwareCommands<TGLLPFamily>(IndirectHeap *);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<TGLLPFamily>(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
|
||||
template const typename TGLLPFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState<TGLLPFamily>(IndirectHeap *ssh, uint32_t index);
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -68,4 +68,5 @@ namespace NEO {
|
|||
template void HardwareParse::findHardwareCommands<BDWFamily>();
|
||||
template void HardwareParse::findHardwareCommands<BDWFamily>(IndirectHeap *);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<BDWFamily>(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
|
||||
template const typename BDWFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState<BDWFamily>(IndirectHeap *ssh, uint32_t index);
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -86,4 +86,5 @@ namespace NEO {
|
|||
template void HardwareParse::findHardwareCommands<SKLFamily>();
|
||||
template void HardwareParse::findHardwareCommands<SKLFamily>(IndirectHeap *);
|
||||
template const void *HardwareParse::getStatelessArgumentPointer<SKLFamily>(const KernelInfo &kernelInfo, uint32_t indexArg, IndirectHeap &ioh, uint32_t rootDeviceIndex);
|
||||
template const typename SKLFamily::RENDER_SURFACE_STATE *HardwareParse::getSurfaceState<SKLFamily>(IndirectHeap *ssh, uint32_t index);
|
||||
} // namespace NEO
|
||||
|
|
|
@ -80,6 +80,8 @@ struct UnitTestHelper {
|
|||
static bool getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem);
|
||||
|
||||
static bool timestampRegisterHighAddress();
|
||||
|
||||
static void validateSbaMocs(uint32_t expectedMocs, CommandStreamReceiver &csr);
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -6,6 +6,10 @@
|
|||
*/
|
||||
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
@ -77,4 +81,20 @@ bool UnitTestHelper<GfxFamily>::timestampRegisterHighAddress() {
|
|||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void UnitTestHelper<GfxFamily>::validateSbaMocs(uint32_t expectedMocs, CommandStreamReceiver &csr) {
|
||||
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<GfxFamily>(csr.getCS(0), 0);
|
||||
auto itorCmd = reverseFind<STATE_BASE_ADDRESS *>(hwParse.cmdList.rbegin(), hwParse.cmdList.rend());
|
||||
EXPECT_NE(hwParse.cmdList.rend(), itorCmd);
|
||||
auto sba = genCmdCast<STATE_BASE_ADDRESS *>(*itorCmd);
|
||||
EXPECT_NE(nullptr, sba);
|
||||
|
||||
auto mocs = sba->getStatelessDataPortAccessMemoryObjectControlState();
|
||||
|
||||
EXPECT_EQ(expectedMocs, mocs);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -9,3 +9,5 @@
|
|||
using GenStruct = NEO::XeHpCore;
|
||||
using GenGfxFamily = NEO::XeHpFamily;
|
||||
#include "shared/test/common/cmd_parse/cmd_parse_xehp_and_later.inl"
|
||||
|
||||
template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState<GenGfxFamily>(IndirectHeap *ssh, uint32_t index);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -110,3 +110,5 @@ const char *CmdParse<GenGfxFamily>::getAdditionalCommandName(void *cmd) {
|
|||
}
|
||||
|
||||
#include "shared/test/common/cmd_parse/cmd_parse_pvc_and_later.inl"
|
||||
|
||||
template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState<GenGfxFamily>(IndirectHeap *ssh, uint32_t index);
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -9,3 +9,5 @@
|
|||
using GenStruct = NEO::XE_HPG_CORE;
|
||||
using GenGfxFamily = NEO::XE_HPG_COREFamily;
|
||||
#include "shared/test/common/cmd_parse/cmd_parse_xehp_and_later.inl"
|
||||
|
||||
template const typename GenGfxFamily::RENDER_SURFACE_STATE *NEO::HardwareParse::getSurfaceState<GenGfxFamily>(IndirectHeap *ssh, uint32_t index);
|
Loading…
Reference in New Issue