feature: update conditional bb start to use qword data

Related-To: NEO-8242

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-09-11 17:20:00 +00:00
committed by Compute-Runtime-Automation
parent def3f2e9ad
commit 2a6be2fccd
10 changed files with 363 additions and 179 deletions

View File

@@ -2259,7 +2259,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
for (uint32_t i = 0; i < this->partitionCount; i++) {
if (relaxedOrderingAllowed) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::Less, true);
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::Less, true, false);
} else {
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
@@ -3365,7 +3365,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
for (uint32_t i = 0u; i < packetsToWait; i++) {
if (relaxedOrderingAllowed) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, Event::STATE_CLEARED,
NEO::CompareOperation::Equal, true);
NEO::CompareOperation::Equal, true, false);
} else {
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
gpuAddr,

View File

@@ -510,13 +510,20 @@ HWTEST2_F(MiMath, givenValueToMakeRightAritmeticShiftWhenUseMiMathThenShiftIsDon
expectMemory<FamilyType>(reinterpret_cast<void *>(allocation->getGpuAddress() + 4), &executeSecondShift, sizeof(uint32_t));
}
template <typename CompareDataT>
struct ConditionalBbStartTests : public MiMath {
using TestCompareDataT = CompareDataT;
void SetUp() override {
MiMath::SetUp();
std::vector<uint32_t> bufferMemory;
std::vector<CompareDataT> bufferMemory;
bufferMemory.resize(compareBufferSize);
if constexpr (isQwordData) {
baseCompareValue = 0x1'0000'0000;
}
std::fill(bufferMemory.begin(), bufferMemory.end(), baseCompareValue);
// bufferMemory[0]; -- Equal. Dont change
@@ -527,213 +534,279 @@ struct ConditionalBbStartTests : public MiMath {
buffer = std::unique_ptr<Buffer>(Buffer::create(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
compareBufferSize * sizeof(uint32_t), bufferMemory.data(), retVal));
compareBufferSize * sizeof(CompareDataT), bufferMemory.data(), retVal));
csr->makeResident(*buffer->getGraphicsAllocation(rootDeviceIndex));
baseGpuVa = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress();
baseWriteGpuVa = baseGpuVa + (sizeof(uint32_t) * numCompareModes);
baseWriteGpuVa = baseGpuVa + (sizeof(CompareDataT) * numCompareModes);
}
template <typename AtomicT>
typename AtomicT::ATOMIC_OPCODES getAtomicOpcode() const {
return isQwordData ? AtomicT::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT : AtomicT::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT;
}
template <typename AtomicT>
typename AtomicT::DATA_SIZE geDataSize() const {
return isQwordData ? AtomicT::DATA_SIZE::DATA_SIZE_QWORD : AtomicT::DATA_SIZE::DATA_SIZE_DWORD;
}
template <typename FamilyType>
void whenDispatchingEqualModeThenResultsAreValidImpl();
template <typename FamilyType>
void whenDispatchingNotEqualModeThenResultsAreValidImpl();
template <typename FamilyType>
void whenDispatchingGreaterOrEqualModeThenResultsAreValidImpl();
template <typename FamilyType>
void whenDispatchingLessModeThenResultsAreValidImpl();
uint64_t baseGpuVa = 0;
uint64_t baseWriteGpuVa = 0;
uint64_t invalidGpuVa = 0x1230000;
uint32_t numCompareModes = 3;
const size_t compareBufferSize = numCompareModes * 3;
const uint32_t baseCompareValue = 10;
CompareDataT baseCompareValue = 10;
std::unique_ptr<Buffer> buffer;
static constexpr bool isQwordData = std::is_same<uint64_t, TestCompareDataT>::value;
};
HWTEST2_F(ConditionalBbStartTests, whenDispatchingEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
using ConditionalBbStartTests32b = ConditionalBbStartTests<uint32_t>;
using ConditionalBbStartTests64b = ConditionalBbStartTests<uint64_t>;
template <typename T>
template <typename FamilyType>
void ConditionalBbStartTests<T>::whenDispatchingEqualModeThenResultsAreValidImpl() {
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
// Equal
{
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::Equal, false);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::Equal, false, isQwordData);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
// Greater
{
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::Equal, false);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::Equal, false, isQwordData);
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t),
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
// Less
{
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::Equal, false);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::Equal, false, isQwordData);
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2),
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
flushStream();
uint32_t expectedValue = baseCompareValue + 1;
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(uint32_t));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t));
TestCompareDataT expectedValue = baseCompareValue + 1;
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(TestCompareDataT));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(TestCompareDataT)), &expectedValue, sizeof(TestCompareDataT));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(TestCompareDataT) * 2)), &expectedValue, sizeof(TestCompareDataT));
}
HWTEST2_F(ConditionalBbStartTests, whenDispatchingNotEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
// Equal
{
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::NotEqual, false);
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
0, 0, 0, 0);
}
// Greater
{
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::NotEqual, false);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t),
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
0, 0, 0, 0);
}
// Less
{
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::NotEqual, false);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2),
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
0, 0, 0, 0);
}
flushStream();
uint32_t expectedValue = baseCompareValue + 1;
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(uint32_t));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t));
HWTEST2_F(ConditionalBbStartTests32b, whenDispatchingEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
whenDispatchingEqualModeThenResultsAreValidImpl<FamilyType>();
}
HWTEST2_F(ConditionalBbStartTests, whenDispatchingGreaterOrEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
// Equal
{
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
0, 0, 0, 0);
}
// Greater
{
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t),
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
0, 0, 0, 0);
}
// Less
{
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false);
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2),
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
0, 0, 0, 0);
}
flushStream();
uint32_t expectedValue = baseCompareValue + 1;
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(uint32_t));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t));
HWTEST2_F(ConditionalBbStartTests64b, whenDispatchingEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
whenDispatchingEqualModeThenResultsAreValidImpl<FamilyType>();
}
HWTEST2_F(ConditionalBbStartTests, whenDispatchingLessModeThenResultsAreValid, IsAtLeastXeHpcCore) {
template <typename T>
template <typename FamilyType>
void ConditionalBbStartTests<T>::whenDispatchingNotEqualModeThenResultsAreValidImpl() {
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
// Equal
{
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::Less, false);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::NotEqual, false, isQwordData);
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
// Greater
{
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(uint32_t), baseCompareValue, NEO::CompareOperation::Less, false);
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(uint32_t),
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::NotEqual, false, isQwordData);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
// Less
{
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart() + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(uint32_t) * 2), baseCompareValue, NEO::CompareOperation::Less, false);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::NotEqual, false, isQwordData);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(uint32_t) * 2),
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
flushStream();
uint32_t expectedValue = baseCompareValue + 1;
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(uint32_t));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(uint32_t)), &expectedValue, sizeof(uint32_t));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(uint32_t) * 2)), &expectedValue, sizeof(uint32_t));
TestCompareDataT expectedValue = baseCompareValue + 1;
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(TestCompareDataT));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(TestCompareDataT)), &expectedValue, sizeof(TestCompareDataT));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(TestCompareDataT) * 2)), &expectedValue, sizeof(TestCompareDataT));
}
HWTEST2_F(ConditionalBbStartTests32b, whenDispatchingNotEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
whenDispatchingNotEqualModeThenResultsAreValidImpl<FamilyType>();
}
HWTEST2_F(ConditionalBbStartTests64b, whenDispatchingNotEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
whenDispatchingNotEqualModeThenResultsAreValidImpl<FamilyType>();
}
template <typename T>
template <typename FamilyType>
void ConditionalBbStartTests<T>::whenDispatchingGreaterOrEqualModeThenResultsAreValidImpl() {
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
// Equal
{
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa, baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false, isQwordData);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
// Greater
{
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false, isQwordData);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
// Less
{
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::GreaterOrEqual, false, isQwordData);
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
flushStream();
TestCompareDataT expectedValue = baseCompareValue + 1;
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(TestCompareDataT));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(TestCompareDataT)), &expectedValue, sizeof(TestCompareDataT));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(TestCompareDataT) * 2)), &expectedValue, sizeof(TestCompareDataT));
}
HWTEST2_F(ConditionalBbStartTests32b, whenDispatchingGreaterOrEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
whenDispatchingGreaterOrEqualModeThenResultsAreValidImpl<FamilyType>();
}
HWTEST2_F(ConditionalBbStartTests64b, whenDispatchingGreaterOrEqualModeThenResultsAreValid, IsAtLeastXeHpcCore) {
whenDispatchingGreaterOrEqualModeThenResultsAreValidImpl<FamilyType>();
}
template <typename T>
template <typename FamilyType>
void ConditionalBbStartTests<T>::whenDispatchingLessModeThenResultsAreValidImpl() {
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
// Equal
{
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa, baseCompareValue, NEO::CompareOperation::Less, false, isQwordData);
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa,
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
// Greater
{
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, invalidGpuVa, baseGpuVa + sizeof(TestCompareDataT), baseCompareValue, NEO::CompareOperation::Less, false, isQwordData);
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + sizeof(TestCompareDataT),
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
// Less
{
uint64_t jumpAddress = taskStream->getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(isQwordData) + EncodeBatchBufferStartOrEnd<FamilyType>::getBatchBufferEndSize();
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(*taskStream, jumpAddress, baseGpuVa + (sizeof(TestCompareDataT) * 2), baseCompareValue, NEO::CompareOperation::Less, false, isQwordData);
NEO::EncodeBatchBufferStartOrEnd<FamilyType>::programBatchBufferEnd(*taskStream); // should be skipped
EncodeAtomic<FamilyType>::programMiAtomic(*taskStream, baseWriteGpuVa + (sizeof(TestCompareDataT) * 2),
getAtomicOpcode<MI_ATOMIC>(),
geDataSize<MI_ATOMIC>(),
0, 0, 0, 0);
}
flushStream();
TestCompareDataT expectedValue = baseCompareValue + 1;
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa), &expectedValue, sizeof(TestCompareDataT));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + sizeof(TestCompareDataT)), &expectedValue, sizeof(TestCompareDataT));
expectMemory<FamilyType>(reinterpret_cast<void *>(baseWriteGpuVa + (sizeof(TestCompareDataT) * 2)), &expectedValue, sizeof(TestCompareDataT));
}
HWTEST2_F(ConditionalBbStartTests32b, whenDispatchingLessModeThenResultsAreValid, IsAtLeastXeHpcCore) {
whenDispatchingLessModeThenResultsAreValidImpl<FamilyType>();
}
HWTEST2_F(ConditionalBbStartTests64b, whenDispatchingLessModeThenResultsAreValid, IsAtLeastXeHpcCore) {
whenDispatchingLessModeThenResultsAreValidImpl<FamilyType>();
}
} // namespace NEO

View File

@@ -1130,7 +1130,7 @@ HWTEST_F(EnqueueKernelTest, givenRelaxedOrderingEnabledWhenCheckingSizeForCsThen
auto newCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, nullptr);
auto semaphoresSize = numberOfDependencyContainers * numberNodesPerContainer * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT);
auto conditionalBbsSize = numberOfDependencyContainers * numberNodesPerContainer * EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart();
auto conditionalBbsSize = numberOfDependencyContainers * numberNodesPerContainer * EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false);
auto registersSize = 2 * EncodeSetMMIO<FamilyType>::sizeREG;
auto expectedSize = baseCommandStreamSize - semaphoresSize + conditionalBbsSize + registersSize;
@@ -1296,10 +1296,10 @@ HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenBarrierWithDependenciesWhenFlu
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(++lrrCmd, 0, compareAddress, 1, CompareOperation::Equal, true));
auto conditionalBbStart2 = reinterpret_cast<void *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart()));
auto conditionalBbStart2 = reinterpret_cast<void *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(conditionalBbStart2, 0, compareAddress, 1, CompareOperation::Equal, true));
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(conditionalBbStart2, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart()));
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(conditionalBbStart2, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
EXPECT_NE(nullptr, sdiCmd);
clReleaseEvent(outEvent);

View File

@@ -439,17 +439,23 @@ struct EncodeBatchBufferStartOrEnd {
static void programBatchBufferEnd(CommandContainer &container);
static void programBatchBufferEnd(LinearStream &commandStream);
static void programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareData, CompareOperation compareOperation, bool indirect);
static void programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg, uint32_t compareData, CompareOperation compareOperation, bool indirect);
static void programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData);
static void programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg, uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData);
static void programConditionalRegRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, AluRegisters compareReg0, AluRegisters compareReg1, CompareOperation compareOperation, bool indirect);
static void programConditionalRegMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg, CompareOperation compareOperation, bool indirect);
static size_t constexpr getCmdSizeConditionalDataMemBatchBufferStart() {
return (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) + (3 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
static size_t constexpr getCmdSizeConditionalDataMemBatchBufferStart(bool useQwordData) {
size_t size = (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) + (2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
size += useQwordData ? sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) : sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
return size;
}
static size_t constexpr getCmdSizeConditionalDataRegBatchBufferStart() {
return (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_REG) + (3 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
static size_t constexpr getCmdSizeConditionalDataRegBatchBufferStart(bool useQwordData) {
size_t size = (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_REG) + (2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM)));
size += useQwordData ? sizeof(typename GfxFamily::MI_LOAD_REGISTER_REG) : sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
return size;
}
static size_t constexpr getCmdSizeConditionalRegMemBatchBufferStart() {

View File

@@ -859,24 +859,39 @@ void EncodeAtomic<Family>::programMiAtomic(LinearStream &commandStream,
template <typename Family>
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress,
uint32_t compareData, CompareOperation compareOperation, bool indirect) {
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData) {
EncodeSetMMIO<Family>::encodeMEM(commandStream, CS_GPR_R7, compareAddress);
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareData, true);
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, 0, true);
if (useQwordData) {
EncodeSetMMIO<Family>::encodeMEM(commandStream, CS_GPR_R7 + 4, compareAddress + 4);
} else {
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
}
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareDataLow, true);
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, compareDataHigh, true);
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::R_7, AluRegisters::R_8, compareOperation, indirect);
}
template <typename Family>
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg,
uint32_t compareData, CompareOperation compareOperation, bool indirect) {
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData) {
EncodeSetMMIO<Family>::encodeREG(commandStream, CS_GPR_R7, compareReg);
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
if (useQwordData) {
EncodeSetMMIO<Family>::encodeREG(commandStream, CS_GPR_R7 + 4, compareReg + 4);
} else {
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
}
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareData, true);
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, 0, true);
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareDataLow, true);
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, compareDataHigh, true);
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::R_7, AluRegisters::R_8, compareOperation, indirect);
}

View File

@@ -122,7 +122,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9);
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4);
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true);
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true, false);
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2, 0, true);
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2 + 4, 0, true);
@@ -178,7 +178,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9);
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4);
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true);
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true, false);
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R7, 8, true);
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R7 + 4, 0, true);
@@ -241,12 +241,12 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
schedulerCmdStream,
loopSectionStartAddress,
CS_GPR_R1, currentRelaxedOrderingQueueSize, CompareOperation::GreaterOrEqual, false);
CS_GPR_R1, currentRelaxedOrderingQueueSize, CompareOperation::GreaterOrEqual, false, false);
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
schedulerCmdStream,
loopSectionStartAddress,
CS_GPR_R5, 1, CompareOperation::Equal, false);
CS_GPR_R5, 1, CompareOperation::Equal, false, false);
}
// 6. Scheduler loop check section
@@ -780,15 +780,15 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStall() {
LinearStream bbStartStream(ringCommandStream.getSpace(EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart()),
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
LinearStream bbStartStream(ringCommandStream.getSpace(EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false)),
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false));
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 1, true);
dispatchSemaphoreSection(currentQueueWorkCount);
// patch conditional bb_start with current GPU address
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(bbStartStream, ringCommandStream.getCurrentGpuAddressPosition(),
CS_GPR_R1, 0, CompareOperation::Equal, false);
CS_GPR_R1, 0, CompareOperation::Equal, false, false);
relaxedOrderingSchedulerRequired = false;
}
@@ -796,7 +796,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStal
template <typename GfxFamily, typename Dispatcher>
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatchRelaxedOrderingQueueStall() {
return getSizeSemaphoreSection(true) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) +
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false);
}
template <typename GfxFamily, typename Dispatcher>

View File

@@ -62,7 +62,7 @@ struct StaticSchedulerSizeAndOffsetSection {
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
static constexpr uint64_t initSectionSize = EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart() + (6 * sizeof(MI_LOAD_REGISTER_IMM)) +
static constexpr uint64_t initSectionSize = EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false) + (6 * sizeof(MI_LOAD_REGISTER_IMM)) +
EncodeMiPredicate<GfxFamily>::getCmdSize() + (2 * sizeof(MI_LOAD_REGISTER_REG));
static constexpr uint64_t loopStartSectionStart = initSectionSize;
@@ -70,7 +70,7 @@ struct StaticSchedulerSizeAndOffsetSection {
EncodeMiPredicate<GfxFamily>::getCmdSize();
static constexpr uint64_t removeTaskSectionStart = loopStartSectionStart + loopStartSectionSize;
static constexpr uint64_t removeStartSectionSize = (2 * EncodeMathMMIO<GfxFamily>::getCmdSizeForIncrementOrDecrement()) + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart() +
static constexpr uint64_t removeStartSectionSize = (2 * EncodeMathMMIO<GfxFamily>::getCmdSizeForIncrementOrDecrement()) + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false) +
(4 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper<GfxFamily, 14>::getCmdsSize() + EncodeMiPredicate<GfxFamily>::getCmdSize() +
(2 * sizeof(MI_LOAD_REGISTER_REG));
@@ -79,7 +79,7 @@ struct StaticSchedulerSizeAndOffsetSection {
(2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeMiPredicate<GfxFamily>::getCmdSize();
static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize;
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + (2 * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + (2 * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false));
static constexpr uint64_t schedulerLoopCheckSectionStart = drainRequestSectionStart + drainRequestSectionSize;
static constexpr uint64_t schedulerLoopCheckSectionSize = (2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper<GfxFamily, 4>::getCmdsSize() +

View File

@@ -116,7 +116,7 @@ struct TimestampPacketHelper {
uint64_t compareOffset = packetId * timestampPacketNode.getSinglePacketSize();
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(cmdStream, 0, compareAddress + compareOffset, TimestampPacketConstants::initValue,
NEO::CompareOperation::Equal, true);
NEO::CompareOperation::Equal, true, false);
}
}
@@ -197,7 +197,7 @@ struct TimestampPacketHelper {
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForRelaxedOrderingNodeDependency(TagNodeBase &timestampPacketNode) {
return (timestampPacketNode.getPacketsUsed() * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataMemBatchBufferStart());
return (timestampPacketNode.getPacketsUsed() * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataMemBatchBufferStart(false));
}
template <typename GfxFamily>

View File

@@ -1102,7 +1102,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
return false;
}
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
return false;
}
@@ -1243,7 +1243,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
return false;
}
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R7, 8)) {
return false;
}
@@ -1369,7 +1369,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
return false;
}
auto conditionalBbStartcmds = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
auto conditionalBbStartcmds = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(conditionalBbStartcmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
CS_GPR_R5, 1, CompareOperation::Equal, false)) {
@@ -1377,7 +1377,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
}
// 6. Scheduler loop check section
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionSize))) {
return false;
@@ -1896,13 +1896,13 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false);
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(startAddress, expectedJumpAddress, CS_GPR_R1, 0, CompareOperation::Equal, false));
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
hwParse.findHardwareCommands<FamilyType>();
bool success = false;
@@ -1997,13 +1997,13 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false);
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(startAddress, expectedJumpAddress, CS_GPR_R1, 0, CompareOperation::Equal, false));
HardwareParse hwParse;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
hwParse.findHardwareCommands<FamilyType>();
bool success = false;

View File

@@ -96,7 +96,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataM
EncodeAluHelper<FamilyType, 4>::getCmdsSize() + sizeof(typename FamilyType::MI_LOAD_REGISTER_REG) +
(2 * EncodeMiPredicate<FamilyType>::getCmdSize()) + sizeof(MI_BATCH_BUFFER_START);
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart());
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false));
constexpr uint64_t startAddress = 0x12340000;
constexpr uint64_t compareAddress = 0x56780000;
@@ -107,7 +107,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataM
uint8_t buffer[expectedSize] = {};
LinearStream cmdStream(buffer, expectedSize);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect, false);
EXPECT_EQ(expectedSize, cmdStream.getUsed());
@@ -132,6 +132,51 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataM
}
}
HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgramming64bConditionalDataMemBatchBufferStartThenProgramCorrectMathOperations, IsAtLeastXeHpcCore) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
constexpr size_t expectedSize = (2 * sizeof(MI_LOAD_REGISTER_MEM)) + (2 * sizeof(MI_LOAD_REGISTER_IMM)) +
EncodeAluHelper<FamilyType, 4>::getCmdsSize() + sizeof(typename FamilyType::MI_LOAD_REGISTER_REG) +
(2 * EncodeMiPredicate<FamilyType>::getCmdSize()) + sizeof(MI_BATCH_BUFFER_START);
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(true));
constexpr uint64_t startAddress = 0x12340000;
constexpr uint64_t compareAddress = 0x56780000;
constexpr uint64_t compareData = 0x12345678'12345678;
for (auto compareOperation : {CompareOperation::Equal, CompareOperation::NotEqual, CompareOperation::GreaterOrEqual}) {
for (bool indirect : {false, true}) {
uint8_t buffer[expectedSize] = {};
LinearStream cmdStream(buffer, expectedSize);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataMemBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareAddress, compareData, compareOperation, indirect, true);
EXPECT_EQ(expectedSize, cmdStream.getUsed());
auto lrmCmd = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(buffer);
EXPECT_EQ(CS_GPR_R7, lrmCmd->getRegisterAddress());
EXPECT_EQ(compareAddress, lrmCmd->getMemoryAddress());
lrmCmd++;
EXPECT_EQ(CS_GPR_R7 + 4, lrmCmd->getRegisterAddress());
EXPECT_EQ(compareAddress + 4, lrmCmd->getMemoryAddress());
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrmCmd);
EXPECT_EQ(CS_GPR_R8, lriCmd->getRegisterOffset());
EXPECT_EQ(static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max()), lriCmd->getDataDword());
lriCmd++;
EXPECT_EQ(CS_GPR_R8 + 4, lriCmd->getRegisterOffset());
EXPECT_EQ(static_cast<uint32_t>(compareData >> 32), lriCmd->getDataDword());
validateBaseProgramming<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
}
}
}
HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataRegBatchBufferStartThenProgramCorrectMathOperations, IsAtLeastXeHpcCore) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
@@ -141,7 +186,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataR
EncodeAluHelper<FamilyType, 4>::getCmdsSize() + sizeof(typename FamilyType::MI_LOAD_REGISTER_REG) +
(2 * EncodeMiPredicate<FamilyType>::getCmdSize()) + sizeof(MI_BATCH_BUFFER_START);
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
constexpr uint64_t startAddress = 0x12340000;
constexpr uint32_t compareReg = CS_GPR_R1;
@@ -152,7 +197,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataR
uint8_t buffer[expectedSize] = {};
LinearStream cmdStream(buffer, expectedSize);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect, false);
EXPECT_EQ(expectedSize, cmdStream.getUsed());
@@ -177,6 +222,51 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataR
}
}
HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgramming64bConditionalDataRegBatchBufferStartThenProgramCorrectMathOperations, IsAtLeastXeHpcCore) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
constexpr size_t expectedSize = (2 * sizeof(MI_LOAD_REGISTER_REG)) + (2 * sizeof(MI_LOAD_REGISTER_IMM)) +
EncodeAluHelper<FamilyType, 4>::getCmdsSize() + sizeof(typename FamilyType::MI_LOAD_REGISTER_REG) +
(2 * EncodeMiPredicate<FamilyType>::getCmdSize()) + sizeof(MI_BATCH_BUFFER_START);
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(true));
constexpr uint64_t startAddress = 0x12340000;
constexpr uint32_t compareReg = CS_GPR_R1;
constexpr uint64_t compareData = 0x12345678'12345678;
for (auto compareOperation : {CompareOperation::Equal, CompareOperation::NotEqual, CompareOperation::GreaterOrEqual}) {
for (bool indirect : {false, true}) {
uint8_t buffer[expectedSize] = {};
LinearStream cmdStream(buffer, expectedSize);
EncodeBatchBufferStartOrEnd<FamilyType>::programConditionalDataRegBatchBufferStart(cmdStream, indirect ? 0 : startAddress, compareReg, compareData, compareOperation, indirect, true);
EXPECT_EQ(expectedSize, cmdStream.getUsed());
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(buffer);
EXPECT_EQ(CS_GPR_R7, lrrCmd->getDestinationRegisterAddress());
EXPECT_EQ(compareReg, lrrCmd->getSourceRegisterAddress());
lrrCmd++;
EXPECT_EQ(CS_GPR_R7 + 4, lrrCmd->getDestinationRegisterAddress());
EXPECT_EQ(compareReg + 4, lrrCmd->getSourceRegisterAddress());
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrrCmd);
EXPECT_EQ(CS_GPR_R8, lriCmd->getRegisterOffset());
EXPECT_EQ(static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max()), lriCmd->getDataDword());
lriCmd++;
EXPECT_EQ(CS_GPR_R8 + 4, lriCmd->getRegisterOffset());
EXPECT_EQ(static_cast<uint32_t>(compareData >> 32), lriCmd->getDataDword());
validateBaseProgramming<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
}
}
}
HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalRegRegBatchBufferStartThenProgramCorrectMathOperations, IsAtLeastXeHpcCore) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;