From c1f71ea7f7045675fb6112fa6147c187388d8ca6 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Fri, 5 May 2023 10:14:07 +0000 Subject: [PATCH] feature: new conditional bb_start mode + aub tests Related-To: LOCI-4332 Signed-off-by: Dunajski, Bartosz --- .../mi_math_aub_tests_dg2_and_later.cpp | 228 ++++++++++++++++++ .../command_container/command_encoder.h | 3 +- .../command_container/command_encoder.inl | 7 +- 3 files changed, 234 insertions(+), 4 deletions(-) diff --git a/opencl/test/unit_test/aub_tests/command_stream/mi_math_aub_tests_dg2_and_later.cpp b/opencl/test/unit_test/aub_tests/command_stream/mi_math_aub_tests_dg2_and_later.cpp index 35a6834149..6c41b4bd31 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/mi_math_aub_tests_dg2_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_stream/mi_math_aub_tests_dg2_and_later.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_container/command_encoder.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" @@ -508,4 +509,231 @@ HWTEST2_F(MiMath, givenValueToMakeRightAritmeticShiftWhenUseMiMathThenShiftIsDon expectNotEqualMemory(reinterpret_cast(allocation->getGpuAddress() + 4), &secondShift, sizeof(uint32_t)); expectMemory(reinterpret_cast(allocation->getGpuAddress() + 4), &executeSecondShift, sizeof(uint32_t)); } + +struct ConditionalBbStartTests : public MiMath { + void SetUp() override { + MiMath::SetUp(); + + std::vector bufferMemory; + bufferMemory.resize(compareBufferSize); + + std::fill(bufferMemory.begin(), bufferMemory.end(), baseCompareValue); + + // bufferMemory[0]; -- Equal. Dont change + bufferMemory[1] += 5; // Greater + bufferMemory[2] -= 5; // Less + + cl_int retVal = CL_SUCCESS; + + buffer = std::unique_ptr(Buffer::create(context, + CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + compareBufferSize * sizeof(uint32_t), bufferMemory.data(), retVal)); + + csr->makeResident(*buffer->getGraphicsAllocation(rootDeviceIndex)); + + baseGpuVa = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); + baseWriteGpuVa = baseGpuVa + (sizeof(uint32_t) * numCompareModes); + } + + uint64_t baseGpuVa = 0; + uint64_t baseWriteGpuVa = 0; + uint64_t invalidGpuVa = 0x1230000; + uint32_t numCompareModes = 3; + const size_t compareBufferSize = numCompareModes * 3; + const uint32_t baseCompareValue = 10; + std::unique_ptr buffer; +}; + +HWTEST2_F(ConditionalBbStartTests, whenDispatchingEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + + // Equal + { + uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd::getBatchBufferEndSize(); + + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::Equal, false); + + NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(*taskStream); // should be skipped + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + // Greater + { + + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::Equal, false); + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t), + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + // Less + { + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::Equal, false); + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2), + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + flushStream(); + + uint32_t expectedValue = baseCompareValue + 1; + expectMemory(reinterpret_cast(baseWriteGpuVa), &expectedValue, sizeof(uint32_t)); + expectMemory(reinterpret_cast(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t)); + expectMemory(reinterpret_cast(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t)); +} + +HWTEST2_F(ConditionalBbStartTests, whenDispatchingNotEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + + // Equal + { + + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::NotEqual, false); + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + // Greater + { + + uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd::getBatchBufferEndSize(); + + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::NotEqual, false); + + NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(*taskStream); // should be skipped + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t), + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + // Less + { + uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd::getBatchBufferEndSize(); + + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::NotEqual, false); + + NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(*taskStream); // should be skipped + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2), + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + flushStream(); + + uint32_t expectedValue = baseCompareValue + 1; + expectMemory(reinterpret_cast(baseWriteGpuVa), &expectedValue, sizeof(uint32_t)); + expectMemory(reinterpret_cast(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t)); + expectMemory(reinterpret_cast(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t)); +} + +HWTEST2_F(ConditionalBbStartTests, whenDispatchingGreaterOrEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + + // Equal + { + uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd::getBatchBufferEndSize(); + + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false); + + NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(*taskStream); // should be skipped + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + // Greater + { + + uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd::getBatchBufferEndSize(); + + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false); + + NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(*taskStream); // should be skipped + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t), + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + // Less + { + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false); + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2), + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + flushStream(); + + uint32_t expectedValue = baseCompareValue + 1; + expectMemory(reinterpret_cast(baseWriteGpuVa), &expectedValue, sizeof(uint32_t)); + expectMemory(reinterpret_cast(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t)); + expectMemory(reinterpret_cast(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t)); +} + +HWTEST2_F(ConditionalBbStartTests, whenDispatchingLessModeThenResultsAreValid, IsAtLeastXeHpcCore) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + + // Equal + { + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::Less, false); + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + // Greater + { + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::Less, false); + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t), + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + // Less + { + uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd::getBatchBufferEndSize(); + + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::Less, false); + + NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(*taskStream); // should be skipped + + EncodeAtomic::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2), + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, 0, 0); + } + + flushStream(); + + uint32_t expectedValue = baseCompareValue + 1; + expectMemory(reinterpret_cast(baseWriteGpuVa), &expectedValue, sizeof(uint32_t)); + expectMemory(reinterpret_cast(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t)); + expectMemory(reinterpret_cast(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t)); +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 25373c8600..7b02bbc530 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -75,7 +75,8 @@ enum class MiPredicateType : uint32_t { enum class CompareOperation : uint32_t { Equal = 0, NotEqual = 1, - GreaterOrEqual = 2 + GreaterOrEqual = 2, + Less = 3, }; struct EncodeWalkerArgs { diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 445c40ea69..955497cc3a 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -911,16 +911,17 @@ void EncodeBatchBufferStartOrEnd::programConditionalBatchBufferStartBase if ((compareOperation == CompareOperation::Equal) || (compareOperation == CompareOperation::NotEqual)) { aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ZF); - } else { - UNRECOVERABLE_IF(compareOperation != CompareOperation::GreaterOrEqual); + } else if ((compareOperation == CompareOperation::GreaterOrEqual) || (compareOperation == CompareOperation::Less)) { aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_CF); + } else { + UNRECOVERABLE_IF(true); } aluHelper.copyToCmdStream(commandStream); EncodeSetMMIO::encodeREG(commandStream, CS_PREDICATE_RESULT_2, CS_GPR_R7); - MiPredicateType predicateType = MiPredicateType::NoopOnResult2Clear; // Equal + MiPredicateType predicateType = MiPredicateType::NoopOnResult2Clear; // Equal or Less if ((compareOperation == CompareOperation::NotEqual) || (compareOperation == CompareOperation::GreaterOrEqual)) { predicateType = MiPredicateType::NoopOnResult2Set; }