mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
Fix multi tile race when using signal event from barrier call
Related-To: NEO-6811 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c305c75413
commit
dad49bd2ee
@@ -229,12 +229,12 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
|
||||
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
|
||||
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions);
|
||||
void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb);
|
||||
void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb, bool workloadPartition);
|
||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask);
|
||||
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker, bool workloadPartition);
|
||||
void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendSignalEventPostWalker(ze_event_handle_t hEvent);
|
||||
void appendSignalEventPostWalker(ze_event_handle_t hEvent, bool workloadPartition);
|
||||
void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired);
|
||||
void appendComputeBarrierCommand();
|
||||
NEO::PipeControlArgs createBarrierFlags();
|
||||
|
||||
@@ -230,10 +230,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
appendEventForProfiling(hEvent, true);
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
ret = appendLaunchKernelWithParams(hKernel, pDispatchArgumentsBuffer,
|
||||
nullptr, true, false, false);
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -251,7 +251,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
appendEventForProfiling(hEvent, true);
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
const bool haveLaunchArguments = pLaunchArgumentsBuffer != nullptr;
|
||||
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(pNumLaunchArguments);
|
||||
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
@@ -268,7 +268,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
}
|
||||
}
|
||||
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -355,9 +355,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||
return ret;
|
||||
}
|
||||
|
||||
appendEventForProfiling(hSignalEvent, true);
|
||||
appendEventForProfiling(hSignalEvent, true, false);
|
||||
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
||||
appendSignalEventPostWalker(hSignalEvent);
|
||||
appendSignalEventPostWalker(hSignalEvent, false);
|
||||
|
||||
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
|
||||
executeCommandListImmediate(true);
|
||||
@@ -801,9 +801,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_ker
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(hEvent, true);
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
} else {
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -911,14 +911,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(NEO
|
||||
|
||||
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
|
||||
|
||||
appendEventForProfiling(hSignalEvent, true);
|
||||
appendEventForProfiling(hSignalEvent, true, false);
|
||||
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
if (copyRegionPreferred) {
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
} else {
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
}
|
||||
appendSignalEventPostWalker(hSignalEvent);
|
||||
appendSignalEventPostWalker(hSignalEvent, false);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -941,9 +941,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
|
||||
commandContainer.addToResidencyContainer(dst);
|
||||
commandContainer.addToResidencyContainer(src);
|
||||
commandContainer.addToResidencyContainer(clearColorAllocation);
|
||||
appendEventForProfiling(hSignalEvent, true);
|
||||
appendEventForProfiling(hSignalEvent, true, false);
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
appendSignalEventPostWalker(hSignalEvent);
|
||||
appendSignalEventPostWalker(hSignalEvent, false);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1597,7 +1597,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
appendEventForProfiling(hSignalEvent, true);
|
||||
appendEventForProfiling(hSignalEvent, true, false);
|
||||
NEO::GraphicsAllocation *gpuAllocation = device->getDriverHandle()->getDriverSystemMemoryAllocation(ptr,
|
||||
size,
|
||||
neoDevice->getRootDeviceIndex(),
|
||||
@@ -1623,19 +1623,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
*commandContainer.getCommandStream(),
|
||||
size,
|
||||
*neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||
appendSignalEventPostWalker(hSignalEvent);
|
||||
appendSignalEventPostWalker(hSignalEvent, false);
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_handle_t hEvent) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_handle_t hEvent, bool workloadPartition) {
|
||||
if (hEvent == nullptr) {
|
||||
return;
|
||||
}
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
if (event->isEventTimestampFlagSet()) {
|
||||
appendEventForProfiling(hEvent, false);
|
||||
appendEventForProfiling(hEvent, false, workloadPartition);
|
||||
} else {
|
||||
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
@@ -1683,7 +1683,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(ze
|
||||
NEO::MiFlushArgs args;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo);
|
||||
}
|
||||
appendWriteKernelTimestamp(hEvent, beforeWalker, false);
|
||||
appendWriteKernelTimestamp(hEvent, beforeWalker, false, false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -1937,7 +1937,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::programSyncBuffer(Kernel &kern
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb, bool workloadPartition) {
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
|
||||
@@ -1949,18 +1949,18 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(ze_event_h
|
||||
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
|
||||
|
||||
if (maskLsb) {
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, mask, globalAddress, false);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, mask, contextAddress, false);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, mask, globalAddress, workloadPartition);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, mask, contextAddress, workloadPartition);
|
||||
} else {
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalAddress, false);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, false);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalAddress, workloadPartition);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, workloadPartition);
|
||||
}
|
||||
|
||||
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker, bool workloadPartition) {
|
||||
if (!hEvent) {
|
||||
return;
|
||||
}
|
||||
@@ -1976,7 +1976,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
|
||||
|
||||
if (beforeWalker) {
|
||||
appendWriteKernelTimestamp(hEvent, beforeWalker, true);
|
||||
appendWriteKernelTimestamp(hEvent, beforeWalker, true, workloadPartition);
|
||||
} else {
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
NEO::PipeControlArgs args;
|
||||
@@ -1988,7 +1988,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
|
||||
|
||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, hwInfo);
|
||||
appendWriteKernelTimestamp(hEvent, beforeWalker, true);
|
||||
appendWriteKernelTimestamp(hEvent, beforeWalker, true, workloadPartition);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2030,7 +2030,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
}
|
||||
|
||||
if (hSignalEvent) {
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(hSignalEvent);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(hSignalEvent, false);
|
||||
}
|
||||
|
||||
auto allocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(uint64_t), false);
|
||||
@@ -2269,19 +2269,25 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
appendEventForProfiling(hSignalEvent, true);
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
if (!hSignalEvent) {
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo);
|
||||
} else {
|
||||
appendComputeBarrierCommand();
|
||||
bool workloadPartition = false;
|
||||
if (this->partitionCount > 1 &&
|
||||
hSignalEvent) {
|
||||
auto event = Event::fromHandle(hSignalEvent);
|
||||
if (event->isEventTimestampFlagSet()) {
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
workloadPartition = true;
|
||||
}
|
||||
} else {
|
||||
appendSignalEventPostWalker(hSignalEvent);
|
||||
}
|
||||
appendEventForProfiling(hSignalEvent, true, workloadPartition);
|
||||
|
||||
if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, this->device->getHwInfo());
|
||||
} else {
|
||||
appendComputeBarrierCommand();
|
||||
}
|
||||
|
||||
appendSignalEventPostWalker(hSignalEvent, workloadPartition);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
const auto kernel = Kernel::fromHandle(hKernel);
|
||||
const auto &kernelDescriptor = kernel->getKernelDescriptor();
|
||||
UNRECOVERABLE_IF(kernel == nullptr);
|
||||
appendEventForProfiling(hEvent, true);
|
||||
appendEventForProfiling(hEvent, true, false);
|
||||
const auto functionImmutableData = kernel->getImmutableData();
|
||||
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
|
||||
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
|
||||
@@ -159,7 +159,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||
}
|
||||
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
appendSignalEventPostWalker(hEvent, false);
|
||||
|
||||
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation());
|
||||
auto &residencyContainer = kernel->getResidencyContainer();
|
||||
|
||||
@@ -1836,7 +1836,7 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing
|
||||
|
||||
auto usedBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
commandList->appendSignalEventPostWalker(nullptr);
|
||||
commandList->appendSignalEventPostWalker(nullptr, false);
|
||||
|
||||
EXPECT_EQ(commandList->commandContainer.getCommandStream()->getUsed(), usedBefore);
|
||||
}
|
||||
|
||||
@@ -6,11 +6,13 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
|
||||
@@ -80,55 +82,15 @@ HWTEST_F(CommandListAppendBarrier, GivenEventVsNoEventWhenAppendingBarrierThenCo
|
||||
ASSERT_LE(sizeWithoutEvent, sizeWithEvent);
|
||||
}
|
||||
|
||||
using MultiTileCommandListAppendBarrier = Test<MultiTileCommandListFixture<false, false, false>>;
|
||||
|
||||
HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated, IsWithinXeGfxFamily) {
|
||||
template <typename FamilyType>
|
||||
void validateMultiTileBarrier(void *cmdBuffer, size_t &parsedOffset,
|
||||
uint64_t gpuFinalSyncAddress, uint64_t gpuCrossTileSyncAddress, uint64_t gpuStartAddress) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
|
||||
EXPECT_EQ(2u, commandList->partitionCount);
|
||||
|
||||
size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
|
||||
sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
size_t startOffset = beforeControlSectionOffset +
|
||||
(2 * sizeof(uint32_t));
|
||||
|
||||
size_t expectedUseBuffer = startOffset +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
|
||||
sizeof(MI_STORE_DATA_IMM) +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT);
|
||||
|
||||
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
auto gpuBaseAddress = commandList->commandContainer.getCommandStream()->getGraphicsAllocation()->getGpuAddress() +
|
||||
usedSpaceBefore;
|
||||
|
||||
auto gpuCrossTileSyncAddress = gpuBaseAddress +
|
||||
beforeControlSectionOffset;
|
||||
|
||||
auto gpuFinalSyncAddress = gpuCrossTileSyncAddress +
|
||||
sizeof(uint32_t);
|
||||
|
||||
auto gpuStartAddress = gpuBaseAddress +
|
||||
startOffset;
|
||||
|
||||
auto result = commandList->appendBarrier(nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
size_t usedBuffer = usedSpaceAfter - usedSpaceBefore;
|
||||
EXPECT_EQ(expectedUseBuffer, usedBuffer);
|
||||
|
||||
void *cmdBuffer = ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore);
|
||||
size_t parsedOffset = 0;
|
||||
|
||||
{
|
||||
auto storeDataImm = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, storeDataImm);
|
||||
@@ -216,6 +178,59 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControl
|
||||
EXPECT_EQ(4u, miSemaphore->getSemaphoreDataDword());
|
||||
parsedOffset += sizeof(MI_SEMAPHORE_WAIT);
|
||||
}
|
||||
}
|
||||
|
||||
using MultiTileCommandListAppendBarrier = Test<MultiTileCommandListFixture<false, false, false>>;
|
||||
|
||||
HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated, IsWithinXeGfxFamily) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
|
||||
EXPECT_EQ(2u, commandList->partitionCount);
|
||||
|
||||
size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
|
||||
sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
size_t startOffset = beforeControlSectionOffset +
|
||||
(2 * sizeof(uint32_t));
|
||||
|
||||
size_t expectedUseBuffer = startOffset +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
|
||||
sizeof(MI_STORE_DATA_IMM) +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT);
|
||||
|
||||
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
auto gpuBaseAddress = commandList->commandContainer.getCommandStream()->getGraphicsAllocation()->getGpuAddress() +
|
||||
usedSpaceBefore;
|
||||
|
||||
auto gpuCrossTileSyncAddress = gpuBaseAddress +
|
||||
beforeControlSectionOffset;
|
||||
|
||||
auto gpuFinalSyncAddress = gpuCrossTileSyncAddress +
|
||||
sizeof(uint32_t);
|
||||
|
||||
auto gpuStartAddress = gpuBaseAddress +
|
||||
startOffset;
|
||||
|
||||
auto result = commandList->appendBarrier(nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
size_t usedBuffer = usedSpaceAfter - usedSpaceBefore;
|
||||
EXPECT_EQ(expectedUseBuffer, usedBuffer);
|
||||
|
||||
void *cmdBuffer = ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore);
|
||||
size_t parsedOffset = 0;
|
||||
|
||||
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress);
|
||||
|
||||
EXPECT_EQ(expectedUseBuffer, parsedOffset);
|
||||
}
|
||||
|
||||
@@ -276,95 +291,274 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
|
||||
void *cmdBuffer = cmdListStream->getCpuBase();
|
||||
size_t parsedOffset = 0;
|
||||
|
||||
{
|
||||
auto storeDataImm = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, storeDataImm);
|
||||
EXPECT_EQ(gpuFinalSyncAddress, storeDataImm->getAddress());
|
||||
EXPECT_EQ(0u, storeDataImm->getDataDword0());
|
||||
parsedOffset += sizeof(MI_STORE_DATA_IMM);
|
||||
}
|
||||
{
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, pipeControl);
|
||||
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
|
||||
EXPECT_FALSE(pipeControl->getDcFlushEnable());
|
||||
parsedOffset += sizeof(PIPE_CONTROL);
|
||||
}
|
||||
{
|
||||
auto miAtomic = genCmdCast<MI_ATOMIC *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, miAtomic);
|
||||
auto miAtomicProgrammedAddress = NEO::UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomic);
|
||||
EXPECT_EQ(gpuCrossTileSyncAddress, miAtomicProgrammedAddress);
|
||||
EXPECT_FALSE(miAtomic->getReturnDataControl());
|
||||
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode());
|
||||
parsedOffset += sizeof(MI_ATOMIC);
|
||||
}
|
||||
{
|
||||
auto miSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, miSemaphore);
|
||||
EXPECT_EQ(gpuCrossTileSyncAddress, miSemaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation());
|
||||
EXPECT_EQ(2u, miSemaphore->getSemaphoreDataDword());
|
||||
parsedOffset += sizeof(MI_SEMAPHORE_WAIT);
|
||||
}
|
||||
{
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, bbStart);
|
||||
EXPECT_EQ(gpuStartAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
parsedOffset += sizeof(MI_BATCH_BUFFER_START);
|
||||
}
|
||||
{
|
||||
auto crossField = reinterpret_cast<uint32_t *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
EXPECT_EQ(0u, *crossField);
|
||||
parsedOffset += sizeof(uint32_t);
|
||||
auto finalField = reinterpret_cast<uint32_t *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
EXPECT_EQ(0u, *finalField);
|
||||
parsedOffset += sizeof(uint32_t);
|
||||
}
|
||||
{
|
||||
auto miAtomic = genCmdCast<MI_ATOMIC *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, miAtomic);
|
||||
auto miAtomicProgrammedAddress = NEO::UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomic);
|
||||
EXPECT_EQ(gpuFinalSyncAddress, miAtomicProgrammedAddress);
|
||||
EXPECT_FALSE(miAtomic->getReturnDataControl());
|
||||
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode());
|
||||
parsedOffset += sizeof(MI_ATOMIC);
|
||||
}
|
||||
{
|
||||
auto miSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, miSemaphore);
|
||||
EXPECT_EQ(gpuFinalSyncAddress, miSemaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation());
|
||||
EXPECT_EQ(2u, miSemaphore->getSemaphoreDataDword());
|
||||
parsedOffset += sizeof(MI_SEMAPHORE_WAIT);
|
||||
}
|
||||
{
|
||||
auto storeDataImm = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, storeDataImm);
|
||||
EXPECT_EQ(gpuCrossTileSyncAddress, storeDataImm->getAddress());
|
||||
EXPECT_EQ(0u, storeDataImm->getDataDword0());
|
||||
parsedOffset += sizeof(MI_STORE_DATA_IMM);
|
||||
}
|
||||
{
|
||||
auto miAtomic = genCmdCast<MI_ATOMIC *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, miAtomic);
|
||||
auto miAtomicProgrammedAddress = NEO::UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomic);
|
||||
EXPECT_EQ(gpuFinalSyncAddress, miAtomicProgrammedAddress);
|
||||
EXPECT_FALSE(miAtomic->getReturnDataControl());
|
||||
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode());
|
||||
parsedOffset += sizeof(MI_ATOMIC);
|
||||
}
|
||||
{
|
||||
auto miSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(ptrOffset(cmdBuffer, parsedOffset));
|
||||
ASSERT_NE(nullptr, miSemaphore);
|
||||
EXPECT_EQ(gpuFinalSyncAddress, miSemaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation());
|
||||
EXPECT_EQ(4u, miSemaphore->getSemaphoreDataDword());
|
||||
parsedOffset += sizeof(MI_SEMAPHORE_WAIT);
|
||||
}
|
||||
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress);
|
||||
|
||||
EXPECT_EQ(expectedUseBuffer, parsedOffset);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListAppendBarrier,
|
||||
GivenNonTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndPostSyncOperation, IsWithinXeGfxFamily) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
|
||||
uint64_t eventGpuAddress = event->getGpuAddress(device);
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventGpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
ze_event_handle_t eventHandle = event->toHandle();
|
||||
|
||||
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
|
||||
EXPECT_EQ(2u, commandList->partitionCount);
|
||||
|
||||
LinearStream *cmdListStream = commandList->commandContainer.getCommandStream();
|
||||
|
||||
size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
|
||||
sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
size_t bbStartOffset = beforeControlSectionOffset +
|
||||
(2 * sizeof(uint32_t));
|
||||
|
||||
size_t multiTileBarrierSize = bbStartOffset +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
|
||||
sizeof(MI_STORE_DATA_IMM) +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT);
|
||||
|
||||
size_t postSyncSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
|
||||
|
||||
auto useSizeBefore = cmdListStream->getUsed();
|
||||
auto result = commandList->appendBarrier(eventHandle, 0, nullptr);
|
||||
auto useSizeAfter = cmdListStream->getUsed();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(2u, event->getPacketsInUse());
|
||||
|
||||
size_t totaSizedBarrierWithNonTimestampEvent = multiTileBarrierSize + postSyncSize;
|
||||
|
||||
EXPECT_EQ(totaSizedBarrierWithNonTimestampEvent, (useSizeAfter - useSizeBefore));
|
||||
|
||||
auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore;
|
||||
|
||||
auto gpuCrossTileSyncAddress = gpuBaseAddress +
|
||||
beforeControlSectionOffset;
|
||||
|
||||
auto gpuFinalSyncAddress = gpuCrossTileSyncAddress +
|
||||
sizeof(uint32_t);
|
||||
|
||||
auto gpuStartAddress = gpuBaseAddress +
|
||||
bbStartOffset;
|
||||
|
||||
void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore);
|
||||
size_t parsedOffset = 0;
|
||||
|
||||
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress);
|
||||
EXPECT_EQ(multiTileBarrierSize, parsedOffset);
|
||||
|
||||
cmdBuffer = ptrOffset(cmdBuffer, parsedOffset);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
cmdBuffer,
|
||||
postSyncSize));
|
||||
|
||||
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, itorPC.size());
|
||||
uint32_t postSyncFound = 0;
|
||||
for (auto it : itorPC) {
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
|
||||
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
|
||||
EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
postSyncFound++;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(1u, postSyncFound);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
uint64_t firstRegisterAddress, uint64_t secondRegisterAddress) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
|
||||
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstRegisterAddress, cmdMem->getMemoryAddress());
|
||||
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(secondRegisterAddress, cmdMem->getMemoryAddress());
|
||||
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListAppendBarrier,
|
||||
GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsWithinXeGfxFamily) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
eventPoolDesc.count = 2;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.wait = 0;
|
||||
eventDesc.signal = 0;
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto eventPoolTimeStamp = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto eventTimeStamp = std::unique_ptr<Event>(Event::create<uint32_t>(eventPoolTimeStamp.get(), &eventDesc, device));
|
||||
|
||||
uint64_t eventGpuAddress = eventTimeStamp->getGpuAddress(device);
|
||||
uint64_t contextStartAddress = eventGpuAddress + event->getContextStartOffset();
|
||||
uint64_t globalStartAddress = eventGpuAddress + event->getGlobalStartOffset();
|
||||
uint64_t contextEndAddress = eventGpuAddress + event->getContextEndOffset();
|
||||
uint64_t globalEndAddress = eventGpuAddress + event->getGlobalEndOffset();
|
||||
|
||||
ze_event_handle_t eventHandle = eventTimeStamp->toHandle();
|
||||
|
||||
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
|
||||
EXPECT_EQ(2u, commandList->partitionCount);
|
||||
|
||||
LinearStream *cmdListStream = commandList->commandContainer.getCommandStream();
|
||||
|
||||
size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
|
||||
sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
size_t bbStartOffset = beforeControlSectionOffset +
|
||||
(2 * sizeof(uint32_t));
|
||||
|
||||
size_t multiTileBarrierSize = bbStartOffset +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) +
|
||||
sizeof(MI_STORE_DATA_IMM) +
|
||||
sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT);
|
||||
|
||||
size_t timestampRegisters = 2 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
NEO::EncodeMath<FamilyType>::streamCommandSize + sizeof(MI_STORE_REGISTER_MEM));
|
||||
|
||||
size_t postBarrierSynchronization = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSinglePipeControl() +
|
||||
NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(device->getHwInfo());
|
||||
size_t stopRegisters = timestampRegisters + postBarrierSynchronization;
|
||||
|
||||
auto useSizeBefore = cmdListStream->getUsed();
|
||||
auto result = commandList->appendBarrier(eventHandle, 0, nullptr);
|
||||
auto useSizeAfter = cmdListStream->getUsed();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(2u, eventTimeStamp->getPacketsInUse());
|
||||
|
||||
size_t totaSizedBarrierWithTimestampEvent = multiTileBarrierSize + timestampRegisters + stopRegisters;
|
||||
EXPECT_EQ(totaSizedBarrierWithTimestampEvent, (useSizeAfter - useSizeBefore));
|
||||
|
||||
void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
cmdBuffer,
|
||||
timestampRegisters));
|
||||
validateTimestampRegisters<FamilyType>(cmdList, globalStartAddress, contextStartAddress);
|
||||
|
||||
auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters;
|
||||
|
||||
auto gpuCrossTileSyncAddress = gpuBaseAddress +
|
||||
beforeControlSectionOffset;
|
||||
|
||||
auto gpuFinalSyncAddress = gpuCrossTileSyncAddress +
|
||||
sizeof(uint32_t);
|
||||
|
||||
auto gpuStartAddress = gpuBaseAddress +
|
||||
bbStartOffset;
|
||||
|
||||
cmdBuffer = ptrOffset(cmdBuffer, timestampRegisters);
|
||||
size_t parsedOffset = 0;
|
||||
|
||||
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress);
|
||||
EXPECT_EQ(multiTileBarrierSize, parsedOffset);
|
||||
|
||||
cmdBuffer = ptrOffset(cmdBuffer, (parsedOffset + postBarrierSynchronization));
|
||||
cmdList.clear();
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
cmdBuffer,
|
||||
timestampRegisters));
|
||||
validateTimestampRegisters<FamilyType>(cmdList, globalEndAddress, contextEndAddress);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -318,7 +318,7 @@ HWTEST2_F(CommandListAppendSignalEvent,
|
||||
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
commandList->partitionCount = packets;
|
||||
commandList->appendSignalEventPostWalker(event->toHandle());
|
||||
commandList->appendSignalEventPostWalker(event->toHandle(), false);
|
||||
EXPECT_EQ(packets, event->getPacketsInUse());
|
||||
|
||||
auto gpuAddress = event->getGpuAddress(device);
|
||||
|
||||
@@ -468,7 +468,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event->toHandle(), true);
|
||||
commandList->appendEventForProfiling(event->toHandle(), true, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
@@ -504,7 +504,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event->toHandle(), false);
|
||||
commandList->appendEventForProfiling(event->toHandle(), false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
@@ -539,7 +539,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegC
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event->toHandle(), false);
|
||||
commandList->appendEventForProfiling(event->toHandle(), false, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
@@ -350,7 +350,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEven
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event->toHandle(), false);
|
||||
commandList->appendEventForProfiling(event->toHandle(), false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
@@ -381,7 +381,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEven
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event->toHandle(), false);
|
||||
commandList->appendEventForProfiling(event->toHandle(), false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
@@ -411,7 +411,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithRegularEventA
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendSignalEventPostWalker(event->toHandle());
|
||||
commandList->appendSignalEventPostWalker(event->toHandle(), false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
|
||||
@@ -88,7 +88,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event->toHandle(), true);
|
||||
commandList->appendEventForProfiling(event->toHandle(), true, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
@@ -124,7 +124,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event->toHandle(), false);
|
||||
commandList->appendEventForProfiling(event->toHandle(), false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
@@ -159,7 +159,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegC
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event->toHandle(), false);
|
||||
commandList->appendEventForProfiling(event->toHandle(), false, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
Reference in New Issue
Block a user