Handle TimestamPacket with implicit dependencies ownership

Change-Id: I22a4de4e9eb904c359583e235e0de54a7c743e07
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2018-09-25 15:44:43 -07:00
committed by sys_ocldev
parent 7ddf1d554b
commit cbd017d495
25 changed files with 352 additions and 48 deletions

View File

@@ -20,6 +20,7 @@
#include "runtime/helpers/get_info.h"
#include "runtime/helpers/mipmap.h"
#include "runtime/helpers/options.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/timestamp_packet.h"
#include "runtime/mem_obj/buffer.h"

View File

@@ -250,8 +250,14 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, cl_uint numEventsInWa
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, &scheduler);
}
if (commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto semaphoreSize = sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
auto atomicSize = sizeof(typename GfxFamily::MI_ATOMIC);
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
expectedSizeCS += (numEventsInWaitList + 1) * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
expectedSizeCS += numEventsInWaitList * (semaphoreSize + atomicSize);
if (!commandQueue.isOOQEnabled()) {
expectedSizeCS += semaphoreSize + atomicSize;
}
}
return commandQueue.getCS(expectedSizeCS);
}

View File

@@ -457,11 +457,8 @@ inline void GpgpuWalkerHelper<GfxFamily>::dispatchOnDeviceWaitlistSemaphores(Lin
if (event->isUserEvent() || (&event->getCommandQueue()->getDevice() != &currentDevice)) {
continue;
}
auto timestampPacket = event->getTimestampPacketNode()->tag;
auto compareAddress = timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
KernelCommandsHelper<GfxFamily>::programMiSemaphoreWait(*commandStream, compareAddress, 1);
TimestmapPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(*commandStream, *event->getTimestampPacketNode()->tag);
}
}
@@ -472,20 +469,16 @@ void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(
TimestampPacket *timestampPacket,
TimestampPacket::WriteOperationType writeOperationType) {
uint64_t address;
if (TimestampPacket::WriteOperationType::BeforeWalker == writeOperationType) {
address = timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::Submit);
} else {
address = timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
if (TimestampPacket::WriteOperationType::AfterWalker == writeOperationType) {
uint64_t address = timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
auto pipeControlCmd = cmdStream->getSpaceForCmd<PIPE_CONTROL>();
*pipeControlCmd = PIPE_CONTROL::sInit();
pipeControlCmd->setCommandStreamerStallEnable(true);
pipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
pipeControlCmd->setAddress(static_cast<uint32_t>(address & 0x0000FFFFFFFFULL));
pipeControlCmd->setAddressHigh(static_cast<uint32_t>(address >> 32));
pipeControlCmd->setImmediateData(0);
}
auto pipeControlCmd = cmdStream->getSpaceForCmd<PIPE_CONTROL>();
*pipeControlCmd = PIPE_CONTROL::sInit();
pipeControlCmd->setCommandStreamerStallEnable(true);
pipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
pipeControlCmd->setAddress(static_cast<uint32_t>(address & 0x0000FFFFFFFFULL));
pipeControlCmd->setAddressHigh(static_cast<uint32_t>(address >> 32));
pipeControlCmd->setImmediateData(0);
}
template <typename GfxFamily>
@@ -740,7 +733,7 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSNonKernel(bool reserveProfi
template <typename GfxFamily>
size_t EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite() {
return 2 * sizeof(PIPE_CONTROL);
return sizeof(PIPE_CONTROL);
}
} // namespace OCLRT

View File

@@ -84,8 +84,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
GpgpuWalkerHelper<GfxFamily>::dispatchOnDeviceWaitlistSemaphores(commandStream, commandQueue.getDevice(),
numEventsInWaitList, eventWaitList);
if (previousTimestampPacketNode) {
auto compareAddress = previousTimestampPacketNode->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
KernelCommandsHelper<GfxFamily>::programMiSemaphoreWait(*commandStream, compareAddress, 1);
TimestmapPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(*commandStream, *previousTimestampPacketNode->tag);
}
}

View File

@@ -784,11 +784,7 @@ void CommandStreamReceiverHw<GfxFamily>::handleEventsTimestampPacketTags(LinearS
makeResident(*event->getTimestampPacketNode()->getGraphicsAllocation());
if (&event->getCommandQueue()->getDevice() != &currentDevice) {
auto timestampPacket = event->getTimestampPacketNode()->tag;
auto compareAddress = timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
KernelCommandsHelper<GfxFamily>::programMiSemaphoreWait(csr, compareAddress, 1);
TimestmapPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(csr, *event->getTimestampPacketNode()->tag);
}
}
}

View File

@@ -15,6 +15,7 @@
#include "runtime/event/event_tracker.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/get_info.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/timestamp_packet.h"
#include "runtime/api/cl_types.h"
#include "runtime/mem_obj/mem_obj.h"

View File

@@ -1044,6 +1044,7 @@ typedef struct tagMI_ATOMIC {
MEMORYADDRESS_INDEX = 0x1,
} PATCH_CONSTANTS;
typedef enum tagATOMIC_OPCODES {
ATOMIC_4B_DECREMENT = 0x6,
ATOMIC_8B_INCREMENT = 0x25,
ATOMIC_8B_DECREMENT = 0x26,
} ATOMIC_OPCODES;

View File

@@ -995,6 +995,7 @@ typedef struct tagMI_ATOMIC {
MEMORYADDRESS_INDEX = 0x1,
} PATCH_CONSTANTS;
typedef enum tagATOMIC_OPCODES {
ATOMIC_4B_DECREMENT = 0x6,
ATOMIC_8B_INCREMENT = 0x25,
ATOMIC_8B_DECREMENT = 0x26,
} ATOMIC_OPCODES;

View File

@@ -993,6 +993,7 @@ typedef struct tagMI_ATOMIC {
MEMORYADDRESS_INDEX = 0x1,
} PATCH_CONSTANTS;
typedef enum tagATOMIC_OPCODES {
ATOMIC_4B_DECREMENT = 0x6,
ATOMIC_8B_INCREMENT = 0x25,
ATOMIC_8B_DECREMENT = 0x26,
} ATOMIC_OPCODES;

View File

@@ -29,6 +29,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
static uint32_t computeSlmValues(uint32_t valueIn);
@@ -147,6 +148,7 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
}
static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData);
static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize);
static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t);
static const uint32_t alignIndirectStatePointer = 64 * sizeof(uint8_t);

View File

@@ -419,6 +419,19 @@ void KernelCommandsHelper<GfxFamily>::programMiSemaphoreWait(LinearStream &comma
miSemaphoreCmd->setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
}
template <typename GfxFamily>
typename GfxFamily::MI_ATOMIC *KernelCommandsHelper<GfxFamily>::programMiAtomic(LinearStream &commandStream, uint64_t writeAddress,
typename MI_ATOMIC::ATOMIC_OPCODES opcode,
typename MI_ATOMIC::DATA_SIZE dataSize) {
auto miAtomic = commandStream.getSpaceForCmd<MI_ATOMIC>();
*miAtomic = MI_ATOMIC::sInit();
miAtomic->setAtomicOpcode(opcode);
miAtomic->setDataSize(dataSize);
miAtomic->setMemoryAddress(static_cast<uint32_t>(writeAddress & 0x0000FFFFFFFFULL));
miAtomic->setMemoryAddressHigh(static_cast<uint32_t>(writeAddress >> 32));
return miAtomic;
}
template <typename GfxFamily>
bool KernelCommandsHelper<GfxFamily>::doBindingTablePrefetch() {
return true;

View File

@@ -9,16 +9,17 @@
#include <cstdint>
#include <array>
#include <atomic>
namespace OCLRT {
#pragma pack(1)
class TimestampPacket {
public:
enum class DataIndex : uint32_t {
ContextStart,
ContextStart = 0,
GlobalStart,
ContextEnd,
GlobalEnd,
Submit,
Max
};
@@ -29,7 +30,8 @@ class TimestampPacket {
bool canBeReleased() const {
return data[static_cast<uint32_t>(DataIndex::ContextEnd)] != 1 &&
data[static_cast<uint32_t>(DataIndex::GlobalEnd)] != 1;
data[static_cast<uint32_t>(DataIndex::GlobalEnd)] != 1 &&
implicitDependenciesCount.load() == 0;
}
uint64_t pickAddressForDataWrite(DataIndex operationType) const {
@@ -37,9 +39,37 @@ class TimestampPacket {
return reinterpret_cast<uint64_t>(&data[index]);
}
void initialize() { data = {{1, 1, 1, 1, 1}}; }
void initialize() {
data = {{1, 1, 1, 1}};
implicitDependenciesCount.store(0);
}
void incImplicitDependenciesCount() { implicitDependenciesCount++; }
uint64_t pickImplicitDependenciesCountWriteAddress() const { return reinterpret_cast<uint64_t>(&implicitDependenciesCount); }
protected:
std::array<uint32_t, static_cast<uint32_t>(DataIndex::Max)> data = {{1, 1, 1, 1, 1}};
std::array<uint32_t, static_cast<uint32_t>(DataIndex::Max)> data = {{1, 1, 1, 1}};
std::atomic<uint32_t> implicitDependenciesCount{0};
};
#pragma pack()
static_assert(((static_cast<uint32_t>(TimestampPacket::DataIndex::Max) + 1) * sizeof(uint32_t)) == sizeof(TimestampPacket),
"This structure is consumed by GPU and has to follow specific restrictions for padding and size");
struct TimestmapPacketHelper {
template <typename GfxFamily>
static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TimestampPacket &timestmapPacket) {
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
auto compareAddress = timestmapPacket.pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
auto dependenciesCountAddress = timestmapPacket.pickImplicitDependenciesCountWriteAddress();
KernelCommandsHelper<GfxFamily>::programMiSemaphoreWait(cmdStream, compareAddress, 1);
timestmapPacket.incImplicitDependenciesCount();
KernelCommandsHelper<GfxFamily>::programMiAtomic(cmdStream, dependenciesCountAddress,
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT,
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD);
}
};
} // namespace OCLRT

View File

@@ -14,6 +14,7 @@
#include "runtime/gmm_helper/resource_info.h"
#include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/basic_math.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/options.h"
#include "runtime/helpers/timestamp_packet.h"
#include "runtime/memory_manager/deferred_deleter.h"

View File

@@ -69,6 +69,9 @@ class TagAllocator {
}
NodeType *getTag() {
if (freeTags.peekIsEmpty()) {
releaseDeferredTags();
}
NodeType *node = freeTags.removeFrontOne().release();
if (!node) {
std::unique_lock<std::mutex> lock(allocatorMutex);
@@ -142,5 +145,28 @@ class TagAllocator {
((void)(End));
tagPoolMemory.push_back(nodesMemory);
}
void releaseDeferredTags() {
IDList<NodeType, false> pendingFreeTags;
IDList<NodeType, false> pendingDeferredTags;
auto currentNode = deferredTags.detachNodes();
while (currentNode != nullptr) {
auto nextNode = currentNode->next;
if (currentNode->tag->canBeReleased()) {
pendingFreeTags.pushFrontOne(*currentNode);
} else {
pendingDeferredTags.pushFrontOne(*currentNode);
}
currentNode = nextNode;
}
if (!pendingFreeTags.peekIsEmpty()) {
freeTags.splice(*pendingFreeTags.detachNodes());
}
if (!pendingDeferredTags.peekIsEmpty()) {
deferredTags.splice(*pendingDeferredTags.detachNodes());
}
}
};
} // namespace OCLRT

View File

@@ -18,6 +18,7 @@ target_sources(igdrcl_aub_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_with_timestamp_packet_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect_aub_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_copy_read_buffer_aub_tests.cpp

View File

@@ -0,0 +1,65 @@
/*
* Copyright (C) 2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/timestamp_packet.h"
#include "runtime/mem_obj/buffer.h"
#include "runtime/utilities/tag_allocator.h"
#include "unit_tests/aub_tests/command_queue/command_enqueue_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "test.h"
using namespace OCLRT;
struct TimestampPacketAubTests : public CommandEnqueueAUBFixture, public ::testing::Test {
void SetUp() override {
DebugManagerStateRestore restore;
DebugManager.flags.EnableTimestampPacket.set(true);
CommandEnqueueAUBFixture::SetUp();
}
void TearDown() override {
CommandEnqueueAUBFixture::TearDown();
}
};
HWTEST_F(TimestampPacketAubTests, givenTwoBatchedEnqueuesWhenDependencyIsResolvedThenDecrementCounterOnGpu) {
MockContext context(&pCmdQ->getDevice());
pCommandStreamReceiver->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
const size_t bufferSize = 1024;
auto retVal = CL_SUCCESS;
uint8_t initialMemory[bufferSize] = {};
uint8_t writePattern1[bufferSize];
uint8_t writePattern2[bufferSize];
std::fill(writePattern1, writePattern1 + sizeof(writePattern1), 1);
std::fill(writePattern2, writePattern2 + sizeof(writePattern2), 1);
auto buffer = std::unique_ptr<Buffer>(Buffer::create(&context, CL_MEM_COPY_HOST_PTR, bufferSize, initialMemory, retVal));
buffer->forceDisallowCPUCopy = true;
cl_event outEvent1, outEvent2;
pCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, bufferSize, writePattern1, 0, nullptr, &outEvent1);
auto node1 = castToObject<Event>(outEvent1)->getTimestampPacketNode();
pCmdQ->enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writePattern2, 0, nullptr, &outEvent2);
auto node2 = castToObject<Event>(outEvent2)->getTimestampPacketNode();
expectMemory<FamilyType>(reinterpret_cast<void *>(buffer->getGraphicsAllocation()->getGpuAddress()), writePattern2, bufferSize);
uint32_t expectedDepsCount = 0;
expectMemory<FamilyType>(reinterpret_cast<void *>(node1->tag->pickImplicitDependenciesCountWriteAddress()),
&expectedDepsCount, sizeof(uint32_t));
uint32_t expectedEndTimestamp[2] = {0, 0};
auto endTimestampAddress1 = reinterpret_cast<void *>(node1->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd));
auto endTimestampAddress2 = reinterpret_cast<void *>(node2->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd));
expectMemory<FamilyType>(endTimestampAddress1, expectedEndTimestamp, 2 * sizeof(uint32_t));
expectMemory<FamilyType>(endTimestampAddress2, expectedEndTimestamp, 2 * sizeof(uint32_t));
clReleaseEvent(outEvent1);
clReleaseEvent(outEvent2);
}

View File

@@ -298,7 +298,7 @@ size_t CnlParse::getCommandLength(void *cmd) {
{
auto pCmd = genCmdCast<MI_ATOMIC *>(cmd);
if (pCmd)
return pCmd->TheStructure.Common.DwordLength + 2;
return sizeof(MI_ATOMIC) / sizeof(uint32_t);
}
{
auto pCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);

View File

@@ -272,7 +272,7 @@ size_t BdwParse::getCommandLength(void *cmd) {
{
auto pCmd = genCmdCast<MI_ATOMIC *>(cmd);
if (pCmd)
return pCmd->TheStructure.Common.DwordLength + 2;
return sizeof(MI_ATOMIC) / sizeof(uint32_t);
}
{
auto pCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);

View File

@@ -298,7 +298,7 @@ size_t SklParse::getCommandLength(void *cmd) {
{
auto pCmd = genCmdCast<MI_ATOMIC *>(cmd);
if (pCmd)
return pCmd->TheStructure.Common.DwordLength + 2;
return sizeof(MI_ATOMIC) / sizeof(uint32_t);
}
{
auto pCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);

View File

@@ -1033,6 +1033,27 @@ HWTEST_F(KernelCommandsHelperTests, givenCompareAddressAndDataWhenProgrammingSem
EXPECT_EQ(0, memcmp(&referenceCommand, buffer, sizeof(MI_SEMAPHORE_WAIT)));
}
HWTEST_F(KernelCommandsHelperTests, whenProgrammingMiAtomicThenSetupAllFields) {
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
uint64_t writeAddress = 0x10000;
auto opcode = MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT;
auto dataSize = MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD;
uint8_t buffer[1024] = {};
LinearStream cmdStream(buffer, 1024);
MI_ATOMIC referenceCommand = MI_ATOMIC::sInit();
referenceCommand.setAtomicOpcode(opcode);
referenceCommand.setDataSize(dataSize);
referenceCommand.setMemoryAddress(static_cast<uint32_t>(writeAddress & 0x0000FFFFFFFFULL));
referenceCommand.setMemoryAddressHigh(static_cast<uint32_t>(writeAddress >> 32));
auto miAtomic = KernelCommandsHelper<FamilyType>::programMiAtomic(cmdStream, writeAddress, opcode, dataSize);
EXPECT_EQ(sizeof(MI_ATOMIC), cmdStream.getUsed());
EXPECT_EQ(miAtomic, cmdStream.getCpuBase());
EXPECT_EQ(0, memcmp(&referenceCommand, miAtomic, sizeof(MI_ATOMIC)));
}
typedef ExecutionModelKernelFixture ParentKernelCommandsFromBinaryTest;
HWTEST_P(ParentKernelCommandsFromBinaryTest, getSizeRequiredForExecutionModelForSurfaceStatesReturnsSizeOfBlocksPlusMaxBindingTableSizeForAllIDTEntriesAndSchedulerSSHSize) {

View File

@@ -25,6 +25,7 @@ struct TimestampPacketSimpleTests : public ::testing::Test {
class MockTimestampPacket : public TimestampPacket {
public:
using TimestampPacket::data;
using TimestampPacket::implicitDependenciesCount;
};
template <typename TagType = TimestampPacket>
@@ -53,6 +54,8 @@ struct TimestampPacketSimpleTests : public ::testing::Test {
void setTagToReadyState(TimestampPacket *tag) {
memset(reinterpret_cast<void *>(tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextStart)), 0, timestampDataSize);
auto dependenciesCount = reinterpret_cast<std::atomic<uint32_t> *>(reinterpret_cast<void *>(tag->pickImplicitDependenciesCountWriteAddress()));
dependenciesCount->store(0);
}
const size_t timestampDataSize = sizeof(uint32_t) * static_cast<size_t>(TimestampPacket::DataIndex::Max);
@@ -77,6 +80,20 @@ struct TimestampPacketTests : public TimestampPacketSimpleTests {
semaphoreCmd->getSemaphoreGraphicsAddress());
};
template <typename MI_ATOMIC>
void verifyMiAtomic(MI_ATOMIC *miAtomicCmd, Event *compareEvent) {
EXPECT_NE(nullptr, miAtomicCmd);
auto writeAddress = compareEvent->getTimestampPacketNode()->tag->pickImplicitDependenciesCountWriteAddress();
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT, miAtomicCmd->getAtomicOpcode());
EXPECT_EQ(static_cast<uint32_t>(writeAddress & 0x0000FFFFFFFFULL), miAtomicCmd->getMemoryAddress());
EXPECT_EQ(static_cast<uint32_t>(writeAddress >> 32), miAtomicCmd->getMemoryAddressHigh());
};
void verifyDependencyCounterValue(TimestampPacket *timestmapPacket, uint32_t expectedValue) {
auto dependenciesCount = reinterpret_cast<std::atomic<uint32_t> *>(reinterpret_cast<void *>(timestmapPacket->pickImplicitDependenciesCountWriteAddress()));
EXPECT_EQ(expectedValue, dependenciesCount->load());
}
ExecutionEnvironment executionEnvironment;
std::unique_ptr<MockDevice> device;
std::unique_ptr<MockContext> context;
@@ -106,18 +123,35 @@ TEST_F(TimestampPacketSimpleTests, whenEndTagIsNotOneThenCanBeReleased) {
EXPECT_TRUE(timestampPacket.canBeReleased());
}
TEST_F(TimestampPacketSimpleTests, givenImplicitDependencyWhenEndTagIsWrittenThenCantBeReleased) {
MockTimestampPacket timestampPacket;
auto contextEndIndex = static_cast<uint32_t>(TimestampPacket::DataIndex::ContextEnd);
auto globalEndIndex = static_cast<uint32_t>(TimestampPacket::DataIndex::GlobalEnd);
timestampPacket.data[contextEndIndex] = 0;
timestampPacket.data[globalEndIndex] = 0;
timestampPacket.implicitDependenciesCount.store(1);
EXPECT_FALSE(timestampPacket.canBeReleased());
timestampPacket.implicitDependenciesCount.store(0);
EXPECT_TRUE(timestampPacket.canBeReleased());
}
TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
MockMemoryManager memoryManager;
MockTagAllocator<MockTimestampPacket> allocator(&memoryManager, 1);
auto firstNode = allocator.getTag();
firstNode->tag->data = {{5, 6, 7, 8, 9}};
firstNode->tag->data = {{5, 6, 7, 8}};
auto dependenciesCount = reinterpret_cast<std::atomic<uint32_t> *>(reinterpret_cast<void *>(firstNode->tag->pickImplicitDependenciesCountWriteAddress()));
setTagToReadyState(firstNode->tag);
allocator.returnTag(firstNode);
(*dependenciesCount)++;
auto secondNode = allocator.getTag();
EXPECT_EQ(secondNode, firstNode);
EXPECT_EQ(0u, dependenciesCount->load());
for (uint32_t i = 0; i < static_cast<uint32_t>(TimestampPacket::DataIndex::Max); i++) {
EXPECT_EQ(1u, secondNode->tag->data[i]);
}
@@ -126,7 +160,7 @@ TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
TEST_F(TimestampPacketSimpleTests, whenObjectIsCreatedThenInitializeAllStamps) {
MockTimestampPacket timestampPacket;
auto maxElements = static_cast<uint32_t>(TimestampPacket::DataIndex::Max);
EXPECT_EQ(5u, maxElements);
EXPECT_EQ(4u, maxElements);
EXPECT_EQ(maxElements, timestampPacket.data.size());
@@ -144,7 +178,7 @@ TEST_F(TimestampPacketSimpleTests, whenAskedForStampAddressThenReturnWithValidOf
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStreamSizeThenAddTwoPipeControls) {
HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStreamSizeThenAddPipeControl) {
MockKernelWithInternals kernel2(*device);
MockMultiDispatchInfo multiDispatchInfo(std::vector<Kernel *>({kernel->mockKernel, kernel2.mockKernel}));
@@ -156,7 +190,28 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQ, 0, false, false, multiDispatchInfo);
auto sizeWithEnabled = mockCmdQ->requestedCmdStreamSize;
auto extendedSize = sizeWithDisabled + (2 * sizeof(typename FamilyType::PIPE_CONTROL)) + sizeof(typename FamilyType::MI_SEMAPHORE_WAIT);
auto extendedSize = sizeWithDisabled + sizeof(typename FamilyType::PIPE_CONTROL) +
sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC);
EXPECT_EQ(sizeWithEnabled, extendedSize);
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimatingStreamSizeDontDontAddAdditionalSize) {
MockMultiDispatchInfo multiDispatchInfo(std::vector<Kernel *>({kernel->mockKernel}));
mockCmdQ->setOoqEnabled();
cl_uint numEventsOnWaitlist = 5;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQ, numEventsOnWaitlist, false, false, multiDispatchInfo);
auto sizeWithDisabled = mockCmdQ->requestedCmdStreamSize;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQ, numEventsOnWaitlist, false, false, multiDispatchInfo);
auto sizeWithEnabled = mockCmdQ->requestedCmdStreamSize;
size_t extendedSize = sizeWithDisabled + EnqueueOperation<FamilyType>::getSizeRequiredForTimestampPacketWrite() +
(numEventsOnWaitlist * (sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC)));
EXPECT_EQ(sizeWithEnabled, extendedSize);
}
@@ -176,7 +231,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr
auto sizeWithEnabled = mockCmdQ->requestedCmdStreamSize;
size_t extendedSize = sizeWithDisabled + EnqueueOperation<FamilyType>::getSizeRequiredForTimestampPacketWrite() +
((numEventsOnWaitlist + 1) * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT));
((numEventsOnWaitlist + 1) * (sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC)));
EXPECT_EQ(sizeWithEnabled, extendedSize);
}
@@ -226,14 +281,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispat
EXPECT_EQ(nullptr, genCmdCast<PIPE_CONTROL *>(*++it));
it--;
} else if (walkersFound == 2) {
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*--it);
EXPECT_NE(nullptr, pipeControl);
verifyPipeControl(pipeControl, timestampPacket.pickAddressForDataWrite(TimestampPacket::DataIndex::Submit));
it++;
pipeControl = genCmdCast<PIPE_CONTROL *>(*++it);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*++it);
EXPECT_NE(nullptr, pipeControl);
verifyPipeControl(pipeControl, timestampPacket.pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd));
it--;
}
}
}
@@ -337,6 +387,7 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForCsrT
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(nullptr, &executionEnvironment, 1u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
@@ -374,7 +425,11 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &event4);
verifyMiAtomic(genCmdCast<MI_ATOMIC *>(*it++), &event4);
verifyDependencyCounterValue(event4.getTimestampPacketNode()->tag, 1);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &event6);
verifyMiAtomic(genCmdCast<MI_ATOMIC *>(*it++), &event6);
verifyDependencyCounterValue(event6.getTimestampPacketNode()->tag, 1);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
@@ -409,6 +464,8 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlo
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &event1);
verifyMiAtomic(genCmdCast<typename FamilyType::MI_ATOMIC *>(*it++), &event1);
verifyDependencyCounterValue(event1.getTimestampPacketNode()->tag, 1);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
@@ -473,8 +530,12 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
semaphoresFound++;
if (semaphoresFound == 1) {
verifySemaphore(semaphoreCmd, &event3);
verifyMiAtomic(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), &event3);
verifyDependencyCounterValue(event3.getTimestampPacketNode()->tag, 1);
} else if (semaphoresFound == 2) {
verifySemaphore(semaphoreCmd, &event5);
verifyMiAtomic(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), &event5);
verifyDependencyCounterValue(event5.getTimestampPacketNode()->tag, 1);
}
}
if (genCmdCast<WALKER *>(*it)) {
@@ -567,37 +628,52 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenDontKee
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
uint32_t semaphoresFound = 0;
uint32_t atomicsFound = 0;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
semaphoresFound++;
}
if (genCmdCast<typename FamilyType::MI_ATOMIC *>(*it)) {
atomicsFound++;
}
}
EXPECT_EQ(0u, semaphoresFound);
EXPECT_EQ(0u, atomicsFound);
}
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDependencyOnPreviousNodeIfItsNotReady) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
cmdQ.obtainNewTimestampPacketNode();
auto firstNode = cmdQ.timestampPacketNode;
verifyDependencyCounterValue(firstNode->tag, 0);
cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
verifyDependencyCounterValue(firstNode->tag, 1);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*hwParser.cmdList.begin());
auto it = hwParser.cmdList.begin();
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
EXPECT_NE(nullptr, semaphoreCmd);
EXPECT_EQ(firstNode->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd), semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation());
auto miAtomicCmd = genCmdCast<MI_ATOMIC *>(*++it);
EXPECT_NE(nullptr, miAtomicCmd);
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT, miAtomicCmd->getAtomicOpcode());
auto decrementAddress = firstNode->tag->pickImplicitDependenciesCountWriteAddress();
EXPECT_EQ(static_cast<uint32_t>(decrementAddress & 0x0000FFFFFFFFULL), miAtomicCmd->getMemoryAddress());
EXPECT_EQ(static_cast<uint32_t>(decrementAddress >> 32), miAtomicCmd->getMemoryAddressHigh());
uint32_t semaphoresFound = 0;
auto it = hwParser.cmdList.begin();
for (++it; it != hwParser.cmdList.end(); it++) {
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
if (genCmdCast<MI_SEMAPHORE_WAIT *>(*it)) {
semaphoresFound++;
}
}
@@ -618,12 +694,17 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingToOoqThenDo
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
uint32_t semaphoresFound = 0;
uint32_t atomicsFound = 0;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
semaphoresFound++;
}
if (genCmdCast<typename FamilyType::MI_ATOMIC *>(*it)) {
atomicsFound++;
}
}
EXPECT_EQ(0u, semaphoresFound);
EXPECT_EQ(0u, atomicsFound);
}
HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentDevicesWhenEnqueueingThenMakeAllTimestampsResident) {

View File

@@ -352,6 +352,28 @@ struct GENX {
inline void setWaitMode(const WAIT_MODE value) {}
} MI_SEMAPHORE_WAIT;
typedef struct tagMI_ATOMIC {
typedef enum tagATOMIC_OPCODES {
ATOMIC_4B_DECREMENT = 0x6,
ATOMIC_8B_INCREMENT = 0x25,
ATOMIC_8B_DECREMENT = 0x26,
} ATOMIC_OPCODES;
typedef enum tagDATA_SIZE {
DATA_SIZE_DWORD = 0x0,
DATA_SIZE_QWORD = 0x1,
DATA_SIZE_OCTWORD = 0x2,
} DATA_SIZE;
static tagMI_ATOMIC sInit(void) {
tagMI_ATOMIC state;
return state;
}
inline void setAtomicOpcode(ATOMIC_OPCODES) {}
inline void setDataSize(DATA_SIZE) {}
inline void setMemoryAddress(uint32_t) {}
inline void setMemoryAddressHigh(uint32_t) {}
} MI_ATOMIC;
using HARDWARE_INTERFACE = BaseInterfaceVersion<GENX>;
typedef GPGPU_WALKER WALKER_TYPE;
static GPGPU_WALKER cmdInitGpgpuWalker;

View File

@@ -7,6 +7,7 @@
#include "runtime/event/event.h"
#include "runtime/helpers/dispatch_info.h"
#include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/timestamp_packet.h"
#include "runtime/memory_manager/memory_constants.h"
#include "runtime/mem_obj/image.h"

View File

@@ -22,6 +22,9 @@ class MockCommandQueue : public CommandQueue {
void setProfilingEnabled() {
commandQueueProperties |= CL_QUEUE_PROFILING_ENABLE;
}
void setOoqEnabled() {
commandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
}
MockCommandQueue() : CommandQueue(nullptr, nullptr, 0) {}
MockCommandQueue(Context *context, Device *device, const cl_queue_properties *props)
: CommandQueue(context, device, props) {

View File

@@ -32,6 +32,7 @@ class MockTagAllocator : public TagAllocator<timeStamps> {
public:
using TagAllocator<timeStamps>::populateFreeTags;
using TagAllocator<timeStamps>::deferredTags;
using TagAllocator<timeStamps>::releaseDeferredTags;
MockTagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment) : TagAllocator<timeStamps>(memMngr, tagCount, tagAlignment) {
}
@@ -296,3 +297,41 @@ TEST_F(TagAllocatorTest, givenReadyTagWhenReturnedThenMoveToFreeList) {
EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_FALSE(tagAllocator.getFreeTags().peekIsEmpty());
}
TEST_F(TagAllocatorTest, givenEmptyFreeListWhenAskingForNewTagThenTryToReleaseDeferredListFirst) {
MockTagAllocator tagAllocator(memoryManager, 1, 1);
auto node = tagAllocator.getTag();
node->tag->release = false;
tagAllocator.returnTag(node);
node->tag->release = false;
EXPECT_TRUE(tagAllocator.getFreeTags().peekIsEmpty());
node = tagAllocator.getTag();
EXPECT_NE(nullptr, node);
EXPECT_TRUE(tagAllocator.getFreeTags().peekIsEmpty()); // empty again - new pool wasnt allocated
}
TEST_F(TagAllocatorTest, givenTagsOnDeferredListWhenReleasingItThenMoveReadyTagsToFreePool) {
MockTagAllocator tagAllocator(memoryManager, 2, 1); // pool with 2 tags
auto node1 = tagAllocator.getTag();
auto node2 = tagAllocator.getTag();
node1->tag->release = false;
node2->tag->release = false;
tagAllocator.returnTag(node1);
tagAllocator.returnTag(node2);
tagAllocator.releaseDeferredTags();
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_TRUE(tagAllocator.getFreeTags().peekIsEmpty());
node1->tag->release = true;
tagAllocator.releaseDeferredTags();
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_FALSE(tagAllocator.getFreeTags().peekIsEmpty());
node2->tag->release = true;
tagAllocator.releaseDeferredTags();
EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_FALSE(tagAllocator.getFreeTags().peekIsEmpty());
}