mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 17:13:29 +08:00
feature: update conditional bb start to use qword data
Related-To: NEO-8242 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
def3f2e9ad
commit
2a6be2fccd
@@ -2259,7 +2259,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
|
||||
|
||||
for (uint32_t i = 0; i < this->partitionCount; i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::Less, true);
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::Less, true, false);
|
||||
|
||||
} else {
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
|
||||
@@ -3365,7 +3365,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
|
||||
for (uint32_t i = 0u; i < packetsToWait; i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, Event::STATE_CLEARED,
|
||||
NEO::CompareOperation::Equal, true);
|
||||
NEO::CompareOperation::Equal, true, false);
|
||||
} else {
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
|
||||
gpuAddr,
|
||||
|
||||
@@ -510,13 +510,20 @@ HWTEST2_F(MiMath, givenValueToMakeRightAritmeticShiftWhenUseMiMathThenShiftIsDon
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(allocation->getGpuAddress() + 4), &executeSecondShift, sizeof(uint32_t));
|
||||
}
|
||||
|
||||
template <typename CompareDataT>
|
||||
struct ConditionalBbStartTests : public MiMath {
|
||||
using TestCompareDataT = CompareDataT;
|
||||
|
||||
void SetUp() override {
|
||||
MiMath::SetUp();
|
||||
|
||||
std::vector<uint32_t> bufferMemory;
|
||||
std::vector<CompareDataT> bufferMemory;
|
||||
bufferMemory.resize(compareBufferSize);
|
||||
|
||||
if constexpr (isQwordData) {
|
||||
baseCompareValue = 0x1'0000'0000;
|
||||
}
|
||||
|
||||
std::fill(bufferMemory.begin(), bufferMemory.end(), baseCompareValue);
|
||||
|
||||
// bufferMemory[0]; -- Equal. Dont change
|
||||
@@ -527,213 +534,279 @@ struct ConditionalBbStartTests : public MiMath {
|
||||
|
||||
buffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
compareBufferSize * sizeof(uint32_t), bufferMemory.data(), retVal));
|
||||
compareBufferSize * sizeof(CompareDataT), bufferMemory.data(), retVal));
|
||||
|
||||
csr->makeResident(*buffer->getGraphicsAllocation(rootDeviceIndex));
|
||||
|
||||
baseGpuVa = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress();
|
||||
baseWriteGpuVa = baseGpuVa + (sizeof(uint32_t) * numCompareModes);
|
||||
baseWriteGpuVa = baseGpuVa + (sizeof(CompareDataT) * numCompareModes);
|
||||
}
|
||||
|
||||
template <typename AtomicT>
|
||||
typename AtomicT::ATOMIC_OPCODES getAtomicOpcode() const {
|
||||
return isQwordData ? AtomicT::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT : AtomicT::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT;
|
||||
}
|
||||
|
||||
template <typename AtomicT>
|
||||
typename AtomicT::DATA_SIZE geDataSize() const {
|
||||
return isQwordData ? AtomicT::DATA_SIZE::DATA_SIZE_QWORD : AtomicT::DATA_SIZE::DATA_SIZE_DWORD;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void whenDispatchingEqualModeThenResultsAreValidImpl();
|
||||
|
||||
template <typename FamilyType>
|
||||
void whenDispatchingNotEqualModeThenResultsAreValidImpl();
|
||||
|
||||
template <typename FamilyType>
|
||||
void whenDispatchingGreaterOrEqualModeThenResultsAreValidImpl();
|
||||
|
||||
template <typename FamilyType>
|
||||
void whenDispatchingLessModeThenResultsAreValidImpl();
|
||||
|
||||
uint64_t baseGpuVa = 0;
|
||||
uint64_t baseWriteGpuVa = 0;
|
||||
uint64_t invalidGpuVa = 0x1230000;
|
||||
uint32_t numCompareModes = 3;
|
||||
const size_t compareBufferSize = numCompareModes * 3;
|
||||
const uint32_t baseCompareValue = 10;
|
||||
CompareDataT baseCompareValue = 10;
|
||||
std::unique_ptr<Buffer> buffer;
|
||||
static constexpr bool isQwordData = std::is_same<uint64_t, TestCompareDataT>::value;
|
||||
};
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests, whenDispatchingEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
using ConditionalBbStartTests32b = ConditionalBbStartTests<uint32_t>;
|
||||
using ConditionalBbStartTests64b = ConditionalBbStartTests<uint64_t>;
|
||||
|
||||
template <typename T>
|
||||
template <typename FamilyType>
|
||||
void ConditionalBbStartTests<T>::whenDispatchingEqualModeThenResultsAreValidImpl() {
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
// Equal
|
||||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::Equal, false);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::Equal, false, isQwordData);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Greater
|
||||
{
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::Equal, false);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::Equal, false, isQwordData);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t),
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Less
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::Equal, false);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::Equal, false, isQwordData);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2),
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
flushStream();
|
||||
|
||||
uint32_t expectedValue = baseCompareValue + 1;
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(uint32_t));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t));
|
||||
TestCompareDataT expectedValue = baseCompareValue + 1;
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(TestCompareDataT));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(TestCompareDataT)), &expectedValue, sizeof(TestCompareDataT));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(TestCompareDataT) * 2)), &expectedValue, sizeof(TestCompareDataT));
|
||||
}
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests, whenDispatchingNotEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
// Equal
|
||||
{
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::NotEqual, false);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Greater
|
||||
{
|
||||
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::NotEqual, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t),
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Less
|
||||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::NotEqual, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2),
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
flushStream();
|
||||
|
||||
uint32_t expectedValue = baseCompareValue + 1;
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(uint32_t));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t));
|
||||
HWTEST2_F(ConditionalBbStartTests32b, whenDispatchingEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
whenDispatchingEqualModeThenResultsAreValidImpl<FamilyType>();
|
||||
}
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests, whenDispatchingGreaterOrEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
// Equal
|
||||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Greater
|
||||
{
|
||||
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t),
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Less
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2),
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
flushStream();
|
||||
|
||||
uint32_t expectedValue = baseCompareValue + 1;
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(uint32_t));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t));
|
||||
HWTEST2_F(ConditionalBbStartTests64b, whenDispatchingEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
whenDispatchingEqualModeThenResultsAreValidImpl<FamilyType>();
|
||||
}
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests, whenDispatchingLessModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
template <typename T>
|
||||
template <typename FamilyType>
|
||||
void ConditionalBbStartTests<T>::whenDispatchingNotEqualModeThenResultsAreValidImpl() {
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
// Equal
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::Less, false);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::NotEqual, false, isQwordData);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Greater
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::Less, false);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t),
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::NotEqual, false, isQwordData);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Less
|
||||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::Less, false);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::NotEqual, false, isQwordData);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2),
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
flushStream();
|
||||
|
||||
uint32_t expectedValue = baseCompareValue + 1;
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(uint32_t));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t));
|
||||
TestCompareDataT expectedValue = baseCompareValue + 1;
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(TestCompareDataT));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(TestCompareDataT)), &expectedValue, sizeof(TestCompareDataT));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(TestCompareDataT) * 2)), &expectedValue, sizeof(TestCompareDataT));
|
||||
}
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests32b, whenDispatchingNotEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
whenDispatchingNotEqualModeThenResultsAreValidImpl<FamilyType>();
|
||||
}
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests64b, whenDispatchingNotEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
whenDispatchingNotEqualModeThenResultsAreValidImpl<FamilyType>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename FamilyType>
|
||||
void ConditionalBbStartTests<T>::whenDispatchingGreaterOrEqualModeThenResultsAreValidImpl() {
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
// Equal
|
||||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false, isQwordData);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Greater
|
||||
{
|
||||
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false, isQwordData);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Less
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false, isQwordData);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
flushStream();
|
||||
|
||||
TestCompareDataT expectedValue = baseCompareValue + 1;
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(TestCompareDataT));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(TestCompareDataT)), &expectedValue, sizeof(TestCompareDataT));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(TestCompareDataT) * 2)), &expectedValue, sizeof(TestCompareDataT));
|
||||
}
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests32b, whenDispatchingGreaterOrEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
whenDispatchingGreaterOrEqualModeThenResultsAreValidImpl<FamilyType>();
|
||||
}
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests64b, whenDispatchingGreaterOrEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
whenDispatchingGreaterOrEqualModeThenResultsAreValidImpl<FamilyType>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename FamilyType>
|
||||
void ConditionalBbStartTests<T>::whenDispatchingLessModeThenResultsAreValidImpl() {
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
// Equal
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::Less, false, isQwordData);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Greater
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::Less, false, isQwordData);
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// Less
|
||||
{
|
||||
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::Less, false, isQwordData);
|
||||
|
||||
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
|
||||
|
||||
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
|
||||
getAtomicOpcode<MI_ATOMIC>(),
|
||||
geDataSize<MI_ATOMIC>(),
|
||||
0, 0, 0, 0);
|
||||
}
|
||||
|
||||
flushStream();
|
||||
|
||||
TestCompareDataT expectedValue = baseCompareValue + 1;
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(TestCompareDataT));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(TestCompareDataT)), &expectedValue, sizeof(TestCompareDataT));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(TestCompareDataT) * 2)), &expectedValue, sizeof(TestCompareDataT));
|
||||
}
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests32b, whenDispatchingLessModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
whenDispatchingLessModeThenResultsAreValidImpl<FamilyType>();
|
||||
}
|
||||
|
||||
HWTEST2_F(ConditionalBbStartTests64b, whenDispatchingLessModeThenResultsAreValid, IsAtLeastXeHpcCore) {
|
||||
whenDispatchingLessModeThenResultsAreValidImpl<FamilyType>();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1130,7 +1130,7 @@ HWTEST_F(EnqueueKernelTest, givenRelaxedOrderingEnabledWhenCheckingSizeForCsThen
|
||||
auto newCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, nullptr);
|
||||
|
||||
auto semaphoresSize = numberOfDependencyContainers * numberNodesPerContainer * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT);
|
||||
auto conditionalBbsSize = numberOfDependencyContainers * numberNodesPerContainer * EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart();
|
||||
auto conditionalBbsSize = numberOfDependencyContainers * numberNodesPerContainer * EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false);
|
||||
auto registersSize = 2 * EncodeSetMMIO<FamilyType>::sizeREG;
|
||||
|
||||
auto expectedSize = baseCommandStreamSize - semaphoresSize + conditionalBbsSize + registersSize;
|
||||
@@ -1296,10 +1296,10 @@ HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenBarrierWithDependenciesWhenFlu
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(++lrrCmd, 0, compareAddress, 1, CompareOperation::Equal, true));
|
||||
|
||||
auto conditionalBbStart2 = reinterpret_cast<void *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart()));
|
||||
auto conditionalBbStart2 = reinterpret_cast<void *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(conditionalBbStart2, 0, compareAddress, 1, CompareOperation::Equal, true));
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(conditionalBbStart2, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart()));
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(conditionalBbStart2, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
|
||||
EXPECT_NE(nullptr, sdiCmd);
|
||||
|
||||
clReleaseEvent(outEvent);
|
||||
|
||||
@@ -439,17 +439,23 @@ struct EncodeBatchBufferStartOrEnd {
|
||||
static void programBatchBufferEnd(CommandContainer &container);
|
||||
static void programBatchBufferEnd(LinearStream &commandStream);
|
||||
|
||||
static void programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareData, CompareOperation compareOperation, bool indirect);
|
||||
static void programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg, uint32_t compareData, CompareOperation compareOperation, bool indirect);
|
||||
static void programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData);
|
||||
static void programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData);
|
||||
static void programConditionalRegRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, AluRegisters compareReg0, AluRegisters compareReg1, CompareOperation compareOperation, bool indirect);
|
||||
static void programConditionalRegMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg, CompareOperation compareOperation, bool indirect);
|
||||
|
||||
static size_t constexpr getCmdSizeConditionalDataMemBatchBufferStart() {
|
||||
return (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) + (3 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
|
||||
static size_t constexpr getCmdSizeConditionalDataMemBatchBufferStart(bool useQwordData) {
|
||||
size_t size = (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) + (2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
|
||||
size += useQwordData ? sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) : sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static size_t constexpr getCmdSizeConditionalDataRegBatchBufferStart() {
|
||||
return (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_REG) + (3 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
|
||||
static size_t constexpr getCmdSizeConditionalDataRegBatchBufferStart(bool useQwordData) {
|
||||
size_t size = (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_REG) + (2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
|
||||
size += useQwordData ? sizeof(typename GfxFamily::MI_LOAD_REGISTER_REG) : sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static size_t constexpr getCmdSizeConditionalRegMemBatchBufferStart() {
|
||||
|
||||
@@ -859,24 +859,39 @@ void EncodeAtomic<Family>::programMiAtomic(LinearStream &commandStream,
|
||||
|
||||
template <typename Family>
|
||||
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress,
|
||||
uint32_t compareData, CompareOperation compareOperation, bool indirect) {
|
||||
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, CS_GPR_R7, compareAddress);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
|
||||
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareData, true);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, 0, true);
|
||||
if (useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, CS_GPR_R7 + 4, compareAddress + 4);
|
||||
} else {
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
|
||||
}
|
||||
|
||||
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
|
||||
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
|
||||
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareDataLow, true);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, compareDataHigh, true);
|
||||
|
||||
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::R_7, AluRegisters::R_8, compareOperation, indirect);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg,
|
||||
uint32_t compareData, CompareOperation compareOperation, bool indirect) {
|
||||
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, CS_GPR_R7, compareReg);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
|
||||
if (useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, CS_GPR_R7 + 4, compareReg + 4);
|
||||
} else {
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
|
||||
}
|
||||
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareData, true);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, 0, true);
|
||||
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
|
||||
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
|
||||
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareDataLow, true);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, compareDataHigh, true);
|
||||
|
||||
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::R_7, AluRegisters::R_8, compareOperation, indirect);
|
||||
}
|
||||
|
||||
@@ -122,7 +122,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2 + 4, 0, true);
|
||||
@@ -178,7 +178,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R7, 8, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R7 + 4, 0, true);
|
||||
@@ -241,12 +241,12 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
loopSectionStartAddress,
|
||||
CS_GPR_R1, currentRelaxedOrderingQueueSize, CompareOperation::GreaterOrEqual, false);
|
||||
CS_GPR_R1, currentRelaxedOrderingQueueSize, CompareOperation::GreaterOrEqual, false, false);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
loopSectionStartAddress,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false);
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false, false);
|
||||
}
|
||||
|
||||
// 6. Scheduler loop check section
|
||||
@@ -780,15 +780,15 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStall() {
|
||||
LinearStream bbStartStream(ringCommandStream.getSpace(EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart()),
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
LinearStream bbStartStream(ringCommandStream.getSpace(EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false)),
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 1, true);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
|
||||
// patch conditional bb_start with current GPU address
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(bbStartStream, ringCommandStream.getCurrentGpuAddressPosition(),
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false, false);
|
||||
|
||||
relaxedOrderingSchedulerRequired = false;
|
||||
}
|
||||
@@ -796,7 +796,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStal
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatchRelaxedOrderingQueueStall() {
|
||||
return getSizeSemaphoreSection(true) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) +
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
|
||||
@@ -62,7 +62,7 @@ struct StaticSchedulerSizeAndOffsetSection {
|
||||
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
|
||||
static constexpr uint64_t initSectionSize = EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart() + (6 * sizeof(MI_LOAD_REGISTER_IMM)) +
|
||||
static constexpr uint64_t initSectionSize = EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false) + (6 * sizeof(MI_LOAD_REGISTER_IMM)) +
|
||||
EncodeMiPredicate<GfxFamily>::getCmdSize() + (2 * sizeof(MI_LOAD_REGISTER_REG));
|
||||
|
||||
static constexpr uint64_t loopStartSectionStart = initSectionSize;
|
||||
@@ -70,7 +70,7 @@ struct StaticSchedulerSizeAndOffsetSection {
|
||||
EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||
|
||||
static constexpr uint64_t removeTaskSectionStart = loopStartSectionStart + loopStartSectionSize;
|
||||
static constexpr uint64_t removeStartSectionSize = (2 * EncodeMathMMIO<GfxFamily>::getCmdSizeForIncrementOrDecrement()) + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart() +
|
||||
static constexpr uint64_t removeStartSectionSize = (2 * EncodeMathMMIO<GfxFamily>::getCmdSizeForIncrementOrDecrement()) + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false) +
|
||||
(4 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper<GfxFamily, 14>::getCmdsSize() + EncodeMiPredicate<GfxFamily>::getCmdSize() +
|
||||
(2 * sizeof(MI_LOAD_REGISTER_REG));
|
||||
|
||||
@@ -79,7 +79,7 @@ struct StaticSchedulerSizeAndOffsetSection {
|
||||
(2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||
|
||||
static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize;
|
||||
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + (2 * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + (2 * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
|
||||
static constexpr uint64_t schedulerLoopCheckSectionStart = drainRequestSectionStart + drainRequestSectionSize;
|
||||
static constexpr uint64_t schedulerLoopCheckSectionSize = (2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper<GfxFamily, 4>::getCmdsSize() +
|
||||
|
||||
@@ -116,7 +116,7 @@ struct TimestampPacketHelper {
|
||||
uint64_t compareOffset = packetId * timestampPacketNode.getSinglePacketSize();
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(cmdStream, 0, compareAddress + compareOffset, TimestampPacketConstants::initValue,
|
||||
NEO::CompareOperation::Equal, true);
|
||||
NEO::CompareOperation::Equal, true, false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -197,7 +197,7 @@ struct TimestampPacketHelper {
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForRelaxedOrderingNodeDependency(TagNodeBase ×tampPacketNode) {
|
||||
return (timestampPacketNode.getPacketsUsed() * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataMemBatchBufferStart());
|
||||
return (timestampPacketNode.getPacketsUsed() * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataMemBatchBufferStart(false));
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -1102,7 +1102,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1243,7 +1243,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R7, 8)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1369,7 +1369,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
return false;
|
||||
}
|
||||
|
||||
auto conditionalBbStartcmds = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
auto conditionalBbStartcmds = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(conditionalBbStartcmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false)) {
|
||||
@@ -1377,7 +1377,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
}
|
||||
|
||||
// 6. Scheduler loop check section
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionSize))) {
|
||||
return false;
|
||||
@@ -1896,13 +1896,13 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
|
||||
|
||||
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
||||
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false);
|
||||
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(startAddress, expectedJumpAddress, CS_GPR_R1, 0, CompareOperation::Equal, false));
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
bool success = false;
|
||||
@@ -1997,13 +1997,13 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
|
||||
|
||||
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
||||
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false);
|
||||
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(startAddress, expectedJumpAddress, CS_GPR_R1, 0, CompareOperation::Equal, false));
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
bool success = false;
|
||||
|
||||
@@ -96,7 +96,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataM
|
||||
EncodeAluHelper<FamilyType, 4>::getCmdsSize() + sizeof(typename FamilyType::MI_LOAD_REGISTER_REG) +
|
||||
(2 * EncodeMiPredicate<FamilyType>::getCmdSize()) + sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart());
|
||||
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false));
|
||||
|
||||
constexpr uint64_t startAddress = 0x12340000;
|
||||
constexpr uint64_t compareAddress = 0x56780000;
|
||||
@@ -107,7 +107,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataM
|
||||
uint8_t buffer[expectedSize] = {};
|
||||
LinearStream cmdStream(buffer, expectedSize);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect, false);
|
||||
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
@@ -132,6 +132,51 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataM
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgramming64bConditionalDataMemBatchBufferStartThenProgramCorrectMathOperations, IsAtLeastXeHpcCore) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
constexpr size_t expectedSize = (2 * sizeof(MI_LOAD_REGISTER_MEM)) + (2 * sizeof(MI_LOAD_REGISTER_IMM)) +
|
||||
EncodeAluHelper<FamilyType, 4>::getCmdsSize() + sizeof(typename FamilyType::MI_LOAD_REGISTER_REG) +
|
||||
(2 * EncodeMiPredicate<FamilyType>::getCmdSize()) + sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(true));
|
||||
|
||||
constexpr uint64_t startAddress = 0x12340000;
|
||||
constexpr uint64_t compareAddress = 0x56780000;
|
||||
constexpr uint64_t compareData = 0x12345678'12345678;
|
||||
|
||||
for (auto compareOperation : {CompareOperation::Equal, CompareOperation::NotEqual, CompareOperation::GreaterOrEqual}) {
|
||||
for (bool indirect : {false, true}) {
|
||||
uint8_t buffer[expectedSize] = {};
|
||||
LinearStream cmdStream(buffer, expectedSize);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect, true);
|
||||
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
auto lrmCmd = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(buffer);
|
||||
EXPECT_EQ(CS_GPR_R7, lrmCmd->getRegisterAddress());
|
||||
EXPECT_EQ(compareAddress, lrmCmd->getMemoryAddress());
|
||||
|
||||
lrmCmd++;
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lrmCmd->getRegisterAddress());
|
||||
EXPECT_EQ(compareAddress + 4, lrmCmd->getMemoryAddress());
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrmCmd);
|
||||
EXPECT_EQ(CS_GPR_R8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max()), lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(compareData >> 32), lriCmd->getDataDword());
|
||||
|
||||
validateBaseProgramming<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataRegBatchBufferStartThenProgramCorrectMathOperations, IsAtLeastXeHpcCore) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
@@ -141,7 +186,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataR
|
||||
EncodeAluHelper<FamilyType, 4>::getCmdsSize() + sizeof(typename FamilyType::MI_LOAD_REGISTER_REG) +
|
||||
(2 * EncodeMiPredicate<FamilyType>::getCmdSize()) + sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
|
||||
constexpr uint64_t startAddress = 0x12340000;
|
||||
constexpr uint32_t compareReg = CS_GPR_R1;
|
||||
@@ -152,7 +197,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataR
|
||||
uint8_t buffer[expectedSize] = {};
|
||||
LinearStream cmdStream(buffer, expectedSize);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect);
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect, false);
|
||||
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
@@ -177,6 +222,51 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataR
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgramming64bConditionalDataRegBatchBufferStartThenProgramCorrectMathOperations, IsAtLeastXeHpcCore) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
constexpr size_t expectedSize = (2 * sizeof(MI_LOAD_REGISTER_REG)) + (2 * sizeof(MI_LOAD_REGISTER_IMM)) +
|
||||
EncodeAluHelper<FamilyType, 4>::getCmdsSize() + sizeof(typename FamilyType::MI_LOAD_REGISTER_REG) +
|
||||
(2 * EncodeMiPredicate<FamilyType>::getCmdSize()) + sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(true));
|
||||
|
||||
constexpr uint64_t startAddress = 0x12340000;
|
||||
constexpr uint32_t compareReg = CS_GPR_R1;
|
||||
constexpr uint64_t compareData = 0x12345678'12345678;
|
||||
|
||||
for (auto compareOperation : {CompareOperation::Equal, CompareOperation::NotEqual, CompareOperation::GreaterOrEqual}) {
|
||||
for (bool indirect : {false, true}) {
|
||||
uint8_t buffer[expectedSize] = {};
|
||||
LinearStream cmdStream(buffer, expectedSize);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect, true);
|
||||
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(buffer);
|
||||
EXPECT_EQ(CS_GPR_R7, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(compareReg, lrrCmd->getSourceRegisterAddress());
|
||||
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(compareReg + 4, lrrCmd->getSourceRegisterAddress());
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrrCmd);
|
||||
EXPECT_EQ(CS_GPR_R8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max()), lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(compareData >> 32), lriCmd->getDataDword());
|
||||
|
||||
validateBaseProgramming<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalRegRegBatchBufferStartThenProgramCorrectMathOperations, IsAtLeastXeHpcCore) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user