From 1fc0826394d4c826385c826609cd5024e5cde4d9 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 29 Jul 2025 19:21:18 +0000 Subject: [PATCH] feature: add encoder to program hw commands in data buffers on gpu Related-To: NEO-15376 Signed-off-by: Zbigniew Zdanowicz --- .../command_container/command_encoder.h | 25 +++ .../command_container/command_encoder.inl | 71 +++++++-- .../command_encoder_xehp_and_later.inl | 48 ++++++ .../gen12lp/command_encoder_gen12lp.cpp | 22 +++ .../command_encoder_tests_xehp_and_later.cpp | 63 ++++++++ .../encoders/test_command_encoder.cpp | 143 +++++++++++++++++- .../unit_test/gen12lp/test_encode_gen12lp.cpp | 21 ++- 7 files changed, 380 insertions(+), 13 deletions(-) diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 69f44d7077..ba9e4dea52 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -46,6 +46,7 @@ struct PipelineSelectArgs; struct RootDeviceEnvironment; struct StateBaseAddressProperties; struct StateComputeModeProperties; +struct StreamProperties; struct ImplicitArgs; struct EncodeKernelArgsExt; @@ -756,6 +757,30 @@ struct EncodeDataMemory { void *srcData, size_t size); static size_t getCommandSizeForEncode(size_t size); + + static void programNoop(LinearStream &commandStream, + uint64_t dstGpuAddress, size_t size); + static void programNoop(void *commandBuffer, + uint64_t dstGpuAddress, size_t size); + static void programBbStart(LinearStream &commandStream, + uint64_t dstGpuAddress, uint64_t address, bool secondLevel, bool indirect, bool predicate); + static void programBbStart(void *commandBuffer, + uint64_t dstGpuAddress, uint64_t address, bool secondLevel, bool indirect, bool predicate); + + static void programFrontEndState(LinearStream &commandStream, + uint64_t dstGpuAddress, + const RootDeviceEnvironment &rootDeviceEnvironment, + uint32_t scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads, + const StreamProperties &streamProperties); + static void programFrontEndState(void *commandBuffer, + uint64_t dstGpuAddress, + const RootDeviceEnvironment &rootDeviceEnvironment, + uint32_t scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads, + const StreamProperties &streamProperties); }; template diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index bb858fd56e..de6525e1af 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -1196,21 +1196,70 @@ inline size_t EncodeDataMemory::getCommandSizeForEncode(size_t size) { auto alignedUpSize = alignUp(size, sizeof(uint32_t)); UNRECOVERABLE_IF(alignedUpSize != size); - size_t commandSize = 0; - - size_t step = 0; - while (size > 0) { - step = sizeof(uint32_t); - if (size >= sizeof(uint64_t)) { - step = sizeof(uint64_t); - } - commandSize += EncodeStoreMemory::getStoreDataImmSize(); - size -= step; - } + size_t steps = (size / sizeof(uint64_t)); + size_t tailSteps = (steps * sizeof(uint64_t) == size) ? 0U : 1U; + size_t commandSize = (steps + tailSteps) * EncodeStoreMemory::getStoreDataImmSize(); return commandSize; } +template +inline void EncodeDataMemory::programNoop(LinearStream &commandStream, + uint64_t dstGpuAddress, size_t size) { + size_t bufferSize = getCommandSizeForEncode(size); + void *commandBuffer = commandStream.getSpace(bufferSize); + programNoop(commandBuffer, dstGpuAddress, size); +} + +template +inline void EncodeDataMemory::programNoop(void *commandBuffer, + uint64_t dstGpuAddress, size_t size) { + using MI_STORE_DATA_IMM = typename Family::MI_STORE_DATA_IMM; + + uint32_t noopValue0 = 0; + uint32_t noopValue1 = 0; + uint32_t i = 0; + + MI_STORE_DATA_IMM *cmdSdi = reinterpret_cast(commandBuffer); + + const size_t alignDownSize = alignDown(size, sizeof(uint64_t)); + size_t unitSize = sizeof(uint64_t); + uint32_t maxIterations = 0; + if (size >= sizeof(uint64_t)) { + maxIterations = static_cast(alignDownSize / sizeof(uint64_t)); + } + for (; i < maxIterations; i++) { + constexpr bool storeQword = true; + EncodeStoreMemory::programStoreDataImm(cmdSdi, (dstGpuAddress + i * unitSize), noopValue0, noopValue1, storeQword, false); + cmdSdi++; + } + if (size > alignDownSize) { + constexpr bool storeQword = false; + EncodeStoreMemory::programStoreDataImm(cmdSdi, (dstGpuAddress + i * unitSize), noopValue0, noopValue1, storeQword, false); + } +} + +template +inline void EncodeDataMemory::programBbStart(LinearStream &commandStream, + uint64_t dstGpuAddress, uint64_t address, bool secondLevel, bool indirect, bool predicate) { + using MI_BATCH_BUFFER_START = typename Family::MI_BATCH_BUFFER_START; + + size_t bufferSize = getCommandSizeForEncode(sizeof(MI_BATCH_BUFFER_START)); + void *commandBuffer = commandStream.getSpace(bufferSize); + EncodeDataMemory::programBbStart(commandBuffer, dstGpuAddress, address, secondLevel, indirect, predicate); +} + +template +inline void EncodeDataMemory::programBbStart(void *commandBuffer, + uint64_t dstGpuAddress, uint64_t address, bool secondLevel, bool indirect, bool predicate) { + using MI_BATCH_BUFFER_START = typename Family::MI_BATCH_BUFFER_START; + + alignas(8) uint8_t bbStartCmdBuffer[sizeof(MI_BATCH_BUFFER_START)]; + EncodeBatchBufferStartOrEnd::programBatchBufferStart(reinterpret_cast(bbStartCmdBuffer), address, secondLevel, indirect, predicate); + + programDataMemory(commandBuffer, dstGpuAddress, bbStartCmdBuffer, sizeof(MI_BATCH_BUFFER_START)); +} + template void EncodeMiPredicate::encode(LinearStream &cmdStream, [[maybe_unused]] MiPredicateType predicateType) { if constexpr (Family::isUsingMiSetPredicate) { diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 167bcb7703..60322d3ac8 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -1221,4 +1221,52 @@ inline void EncodeWA::setAdditionalPipeControlFlagsForNonPipelineStateCo args.unTypedDataPortCacheFlush = true; } +template +void EncodeDataMemory::programFrontEndState( + LinearStream &commandStream, + uint64_t dstGpuAddress, + const RootDeviceEnvironment &rootDeviceEnvironment, + uint32_t scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads, + const StreamProperties &streamProperties) { + if constexpr (Family::isHeaplessRequired() == false) { + using CFE_STATE = typename Family::CFE_STATE; + + size_t bufferSize = getCommandSizeForEncode(sizeof(CFE_STATE)); + void *commandBuffer = commandStream.getSpace(bufferSize); + EncodeDataMemory::programFrontEndState(commandBuffer, + dstGpuAddress, + rootDeviceEnvironment, + scratchSize, + scratchAddress, + maxFrontEndThreads, + streamProperties); + } +} + +template +void EncodeDataMemory::programFrontEndState( + void *commandBuffer, + uint64_t dstGpuAddress, + const RootDeviceEnvironment &rootDeviceEnvironment, + uint32_t scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads, + const StreamProperties &streamProperties) { + if constexpr (Family::isHeaplessRequired() == false) { + using CFE_STATE = typename Family::CFE_STATE; + + alignas(8) uint8_t feInputCmdBuffer[sizeof(CFE_STATE)]; + PreambleHelper::programVfeState(feInputCmdBuffer, + rootDeviceEnvironment, + scratchSize, + scratchAddress, + maxFrontEndThreads, + streamProperties); + + programDataMemory(commandBuffer, dstGpuAddress, feInputCmdBuffer, sizeof(CFE_STATE)); + } +} + } // namespace NEO diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 82c504ea7e..182a92e8c5 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -802,6 +802,28 @@ bool EncodeEnableRayTracing::is48bResourceNeededForRayTracing() { return true; } +template <> +void EncodeDataMemory::programFrontEndState( + LinearStream &commandStream, + uint64_t dstGpuAddress, + const RootDeviceEnvironment &rootDeviceEnvironment, + uint32_t scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads, + const StreamProperties &streamProperties) { +} + +template <> +void EncodeDataMemory::programFrontEndState( + void *commandBuffer, + uint64_t dstGpuAddress, + const RootDeviceEnvironment &rootDeviceEnvironment, + uint32_t scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads, + const StreamProperties &streamProperties) { +} + } // namespace NEO #include "shared/source/command_container/command_encoder_enablers.inl" diff --git a/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp b/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp index e9c8c8d360..a2b58ade8d 100644 --- a/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp @@ -10,6 +10,7 @@ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/hw_test.h" @@ -235,3 +236,65 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpAndLaterSbaTest, givenNonZeroInternalHeapBaseAd StateBaseAddressHelper::appendStateBaseAddressParameters(args); EXPECT_EQ(0ull, sbaCmd.getGeneralStateBaseAddress()); } + +HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenEncodeDataInMemoryWhenProgrammingFeCmdThenExpectFeCmdDataInDispatchedCommand) { + if constexpr (FamilyType::isHeaplessRequired()) { + constexpr size_t bufferSize = 256; + alignas(8) uint8_t buffer[bufferSize] = {0x0}; + alignas(8) uint8_t zeroBuffer[bufferSize] = {0x0}; + LinearStream cmdStream(buffer, bufferSize); + + MockExecutionEnvironment executionEnvironment{}; + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + StreamProperties properties; + + uint64_t dstGpuAddress = 0x1000; + + EncodeDataMemory::programFrontEndState(buffer, dstGpuAddress, rootDeviceEnvironment, 0x0, 0x0, 0x40, properties); + EXPECT_EQ(0, memcmp(buffer, zeroBuffer, bufferSize)); + + EncodeDataMemory::programFrontEndState(cmdStream, dstGpuAddress, rootDeviceEnvironment, 0x0, 0x0, 0x40, properties); + EXPECT_EQ(0u, cmdStream.getUsed()); + } else { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using CFE_STATE = typename FamilyType::CFE_STATE; + + constexpr size_t cfeStateSizeDwordUnits = sizeof(CFE_STATE) / sizeof(uint32_t); + uint32_t cfeStateCmdBuffer[cfeStateSizeDwordUnits]; + memset(cfeStateCmdBuffer, 0x0, sizeof(CFE_STATE)); + + constexpr size_t bufferSize = 256; + alignas(8) uint8_t buffer[bufferSize] = {0x0}; + LinearStream cmdStream(buffer, bufferSize); + + auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); + StreamProperties properties; + + uint64_t dstGpuAddress = 0x1000; + uint32_t scratchAddress = 0x100; + uint32_t maxFrontEndThreads = 0x40; + EncodeDataMemory::programFrontEndState(cmdStream, dstGpuAddress, rootDeviceEnvironment, 0x0, scratchAddress, maxFrontEndThreads, properties); + HardwareParse hwParser; + hwParser.parseCommands(cmdStream); + auto storeDataImmItList = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); + + size_t i = 0; + for (auto storeDataImmIt : storeDataImmItList) { + auto storeDataImm = reinterpret_cast(*storeDataImmIt); + EXPECT_EQ(dstGpuAddress + i * sizeof(uint64_t), storeDataImm->getAddress()); + + cfeStateCmdBuffer[2 * i] = storeDataImm->getDataDword0(); + if (storeDataImm->getStoreQword()) { + ASSERT_TRUE(cfeStateSizeDwordUnits > (2 * i + 1)); + cfeStateCmdBuffer[2 * i + 1] = storeDataImm->getDataDword1(); + } + i++; + } + + auto cfeStateCmd = genCmdCast(cfeStateCmdBuffer); + ASSERT_NE(nullptr, cfeStateCmd); + + EXPECT_EQ(scratchAddress, cfeStateCmd->getScratchSpaceBuffer()); + EXPECT_EQ(maxFrontEndThreads, cfeStateCmd->getMaximumNumberOfThreads()); + } +} diff --git a/shared/test/unit_test/encoders/test_command_encoder.cpp b/shared/test/unit_test/encoders/test_command_encoder.cpp index cec0be5804..5d7a030e15 100644 --- a/shared/test/unit_test/encoders/test_command_encoder.cpp +++ b/shared/test/unit_test/encoders/test_command_encoder.cpp @@ -266,7 +266,7 @@ HWTEST_F(CommandEncoderTest, givenEncodeDataInMemoryWhenCorrectSizesProvidedThen using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; constexpr size_t bufferSize = 256; - uint8_t buffer[bufferSize] = {}; + alignas(8) uint8_t buffer[bufferSize] = {}; LinearStream cmdStream(buffer, bufferSize); uint32_t data[5] = {0x1, 0x2, 0x3, 0x4, 0x5}; @@ -419,3 +419,144 @@ HWTEST_F(CommandEncoderTest, givenEncodeDataInMemoryWhenCorrectSizesProvidedThen EXPECT_EQ(data[4], storeDataImm->getDataDword0()); EXPECT_EQ(0u, storeDataImm->getDataDword1()); } + +HWTEST_F(CommandEncoderTest, givenEncodeDataInMemoryWhenProgrammingNoopThenExpectNoopDataInDispatchedCommand) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + + uint32_t expectedNoop[8] = {0}; + uint32_t dispatchedData[8]; + memset(dispatchedData, 0xFF, sizeof(dispatchedData)); + + constexpr size_t bufferSize = 256; + alignas(8) uint8_t buffer[bufferSize] = {}; + LinearStream cmdStream(buffer, bufferSize); + + uint64_t dstGpuAddress = 0x1000; + + // noop 1 dword - 1 dword SDI + EncodeDataMemory::programNoop(cmdStream, dstGpuAddress, sizeof(uint32_t)); + + HardwareParse hwParser; + hwParser.parseCommands(cmdStream); + auto storeDataImmItList = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_EQ(1u, storeDataImmItList.size()); + + auto storeDataImmIt = storeDataImmItList[0]; + MI_STORE_DATA_IMM *storeDataImm = reinterpret_cast(*storeDataImmIt); + EXPECT_EQ(dstGpuAddress, storeDataImm->getAddress()); + EXPECT_FALSE(storeDataImm->getStoreQword()); + EXPECT_EQ(0u, storeDataImm->getDataDword0()); + + memset(buffer, 0x0, bufferSize); + cmdStream.replaceBuffer(buffer, bufferSize); + hwParser.tearDown(); + + // noop 2 dword - 1 qword SDI + EncodeDataMemory::programNoop(cmdStream, dstGpuAddress, 2 * sizeof(uint32_t)); + + hwParser.parseCommands(cmdStream); + storeDataImmItList = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_EQ(1u, storeDataImmItList.size()); + + storeDataImmIt = storeDataImmItList[0]; + storeDataImm = reinterpret_cast(*storeDataImmIt); + EXPECT_EQ(dstGpuAddress, storeDataImm->getAddress()); + EXPECT_TRUE(storeDataImm->getStoreQword()); + EXPECT_EQ(0u, storeDataImm->getDataDword0()); + EXPECT_EQ(0u, storeDataImm->getDataDword1()); + + memset(buffer, 0x0, bufferSize); + cmdStream.replaceBuffer(buffer, bufferSize); + hwParser.tearDown(); + + // noop 7 dwords - 3x qword SDI + 1 dword SDI + EncodeDataMemory::programNoop(cmdStream, dstGpuAddress, sizeof(expectedNoop) - sizeof(uint32_t)); + + hwParser.parseCommands(cmdStream); + storeDataImmItList = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_EQ(4u, storeDataImmItList.size()); + + // verify qwords + size_t i = 0; + for (; i < 3; i++) { + storeDataImmIt = storeDataImmItList[i]; + storeDataImm = reinterpret_cast(*storeDataImmIt); + + EXPECT_EQ(dstGpuAddress + i * sizeof(uint64_t), storeDataImm->getAddress()); + EXPECT_TRUE(storeDataImm->getStoreQword()); + dispatchedData[i * 2] = storeDataImm->getDataDword0(); + dispatchedData[i * 2 + 1] = storeDataImm->getDataDword1(); + } + storeDataImmIt = storeDataImmItList[i]; + storeDataImm = reinterpret_cast(*storeDataImmIt); + EXPECT_EQ(dstGpuAddress + i * sizeof(uint64_t), storeDataImm->getAddress()); + EXPECT_FALSE(storeDataImm->getStoreQword()); + dispatchedData[i * 2] = storeDataImm->getDataDword0(); + + EXPECT_EQ(0, memcmp(expectedNoop, dispatchedData, sizeof(expectedNoop) - sizeof(uint32_t))); + + memset(dispatchedData, 0xFF, sizeof(dispatchedData)); + memset(buffer, 0x0, bufferSize); + cmdStream.replaceBuffer(buffer, bufferSize); + i = 0; + hwParser.tearDown(); + + // noop 8 dwords - 4x qword SDI + EncodeDataMemory::programNoop(cmdStream, dstGpuAddress, sizeof(expectedNoop)); + + hwParser.parseCommands(cmdStream); + storeDataImmItList = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_EQ(4u, storeDataImmItList.size()); + for (; i < 4; i++) { + storeDataImmIt = storeDataImmItList[i]; + storeDataImm = reinterpret_cast(*storeDataImmIt); + + EXPECT_EQ(dstGpuAddress + i * sizeof(uint64_t), storeDataImm->getAddress()); + EXPECT_TRUE(storeDataImm->getStoreQword()); + dispatchedData[i * 2] = storeDataImm->getDataDword0(); + dispatchedData[i * 2 + 1] = storeDataImm->getDataDword1(); + } + EXPECT_EQ(0, memcmp(expectedNoop, dispatchedData, sizeof(expectedNoop))); +} + +HWTEST_F(CommandEncoderTest, givenEncodeDataInMemoryWhenProgrammingBbStartThenExpectBbStartCmdDataInDispatchedCommand) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + + constexpr size_t bbStartSizeDwordUnits = sizeof(MI_BATCH_BUFFER_START) / sizeof(uint32_t); + uint32_t bbStartCmdBuffer[bbStartSizeDwordUnits]; + memset(bbStartCmdBuffer, 0x0, sizeof(MI_BATCH_BUFFER_START)); + + constexpr size_t bufferSize = 256; + alignas(8) uint8_t buffer[bufferSize] = {}; + LinearStream cmdStream(buffer, bufferSize); + + uint64_t dstGpuAddress = 0x1000; + + uint64_t bbStartAddress = 0x2000; + + EncodeDataMemory::programBbStart(cmdStream, dstGpuAddress, bbStartAddress, false, false, false); + + HardwareParse hwParser; + hwParser.parseCommands(cmdStream); + auto storeDataImmItList = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); + + size_t i = 0; + for (auto storeDataImmIt : storeDataImmItList) { + auto storeDataImm = reinterpret_cast(*storeDataImmIt); + EXPECT_EQ(dstGpuAddress + i * sizeof(uint64_t), storeDataImm->getAddress()); + + bbStartCmdBuffer[2 * i] = storeDataImm->getDataDword0(); + if (storeDataImm->getStoreQword()) { + ASSERT_TRUE(bbStartSizeDwordUnits > (2 * i + 1)); + bbStartCmdBuffer[2 * i + 1] = storeDataImm->getDataDword1(); + } + i++; + } + + auto bbStartCmd = genCmdCast(bbStartCmdBuffer); + ASSERT_NE(nullptr, bbStartCmd); + + EXPECT_EQ(bbStartAddress, bbStartCmd->getBatchBufferStartAddress()); + EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH, bbStartCmd->getSecondLevelBatchBuffer()); +} diff --git a/shared/test/unit_test/gen12lp/test_encode_gen12lp.cpp b/shared/test/unit_test/gen12lp/test_encode_gen12lp.cpp index 2e4726937d..27e8003243 100644 --- a/shared/test/unit_test/gen12lp/test_encode_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/test_encode_gen12lp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -35,3 +35,22 @@ GEN12LPTEST_F(CommandEncodeGen12LpTest, whenProgrammingStateComputeModeThenPrope EXPECT_EQ(FamilyType::stateComputeModeForceNonCoherentMask, pScm->getMaskBits()); EXPECT_EQ(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED, pScm->getForceNonCoherent()); } + +GEN12LPTEST_F(CommandEncodeGen12LpTest, givenEncodeDataInMemoryWhenProgrammingFeCmdThenTakeNoAction) { + constexpr size_t bufferSize = 256; + alignas(8) uint8_t buffer[bufferSize] = {0x0}; + alignas(8) uint8_t zeroBuffer[bufferSize] = {0x0}; + LinearStream cmdStream(buffer, bufferSize); + + MockExecutionEnvironment executionEnvironment{}; + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + StreamProperties properties; + + uint64_t dstGpuAddress = 0x1000; + + EncodeDataMemory::programFrontEndState(buffer, dstGpuAddress, rootDeviceEnvironment, 0x0, 0x0, 0x40, properties); + EXPECT_EQ(0, memcmp(buffer, zeroBuffer, bufferSize)); + + EncodeDataMemory::programFrontEndState(cmdStream, dstGpuAddress, rootDeviceEnvironment, 0x0, 0x0, 0x40, properties); + EXPECT_EQ(0u, cmdStream.getUsed()); +}