Create definition of tag allocation layout

we use tag allocation for multiple purposes, therefore we should define
all offsets in one place

Resolves: NEO-7559
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski 2022-12-06 07:32:34 +00:00 committed by Compute-Runtime-Automation
parent ba9ea6fabf
commit 103f522f18
13 changed files with 67 additions and 35 deletions

View File

@ -52,6 +52,7 @@ set(NEO_CORE_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/submission_status.h
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocation_layout.h
${CMAKE_CURRENT_SOURCE_DIR}/task_count_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.h

View File

@ -12,6 +12,7 @@
#include "shared/source/command_stream/experimental_command_buffer.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/command_stream/scratch_space_controller.h"
#include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/device.h"
#include "shared/source/direct_submission/direct_submission_controller.h"
@ -444,7 +445,7 @@ void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
UNRECOVERABLE_IF(allocation == nullptr);
this->tagAddress = reinterpret_cast<TagAddressType *>(allocation->getUnderlyingBuffer());
this->debugPauseStateAddress = reinterpret_cast<DebugPauseState *>(
reinterpret_cast<uint8_t *>(allocation->getUnderlyingBuffer()) + debugPauseStateAddressOffset);
reinterpret_cast<uint8_t *>(allocation->getUnderlyingBuffer()) + TagAllocationLayout::debugPauseStateAddressOffset);
}
MultiGraphicsAllocation &CommandStreamReceiver::createTagsMultiAllocation() {
@ -687,10 +688,14 @@ bool CommandStreamReceiver::initializeTagAllocation() {
this->setTagAllocation(tagAllocation);
auto initValue = DebugManager.flags.EnableNullHardware.get() ? static_cast<uint32_t>(-1) : initialHardwareTag;
auto tagAddress = this->tagAddress;
auto completionFence = reinterpret_cast<TaskCountType *>(getCompletionAddress());
UNRECOVERABLE_IF(!completionFence);
uint32_t subDevices = static_cast<uint32_t>(this->deviceBitfield.count());
for (uint32_t i = 0; i < subDevices; i++) {
*tagAddress = initValue;
tagAddress = ptrOffset(tagAddress, this->postSyncWriteOffset);
*completionFence = 0;
completionFence = ptrOffset(completionFence, this->postSyncWriteOffset);
}
*this->debugPauseStateAddress = DebugManager.flags.EnableNullHardware.get() ? DebugPauseState::disabled : DebugPauseState::waitingForFirstSemaphore;
@ -956,4 +961,13 @@ TaskCountType CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionS
}
}
uint64_t CommandStreamReceiver::getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + TagAllocationLayout::debugPauseStateAddressOffset; }
uint64_t CommandStreamReceiver::getCompletionAddress() const {
uint64_t completionFenceAddress = castToUint64(const_cast<TagAddressType *>(tagAddress));
if (completionFenceAddress == 0) {
return 0;
}
completionFenceAddress += TagAllocationLayout::completionFenceOffset;
return completionFenceAddress;
}
} // namespace NEO

View File

@ -133,7 +133,7 @@ class CommandStreamReceiver {
}
MultiGraphicsAllocation &createTagsMultiAllocation();
volatile TagAddressType *getTagAddress() const { return tagAddress; }
uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; }
uint64_t getDebugPauseStateGPUAddress() const;
virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; }
@ -335,14 +335,7 @@ class CommandStreamReceiver {
MOCKABLE_VIRTUAL bool isGpuHangDetected() const;
MOCKABLE_VIRTUAL bool checkGpuHangDetected(TimeType currentTime, TimeType &lastHangCheckTime) const;
uint64_t getCompletionAddress() const {
uint64_t completionFenceAddress = castToUint64(const_cast<TagAddressType *>(tagAddress));
if (completionFenceAddress == 0) {
return 0;
}
completionFenceAddress += completionFenceOffset;
return completionFenceAddress;
}
uint64_t getCompletionAddress() const;
TaskCountType getCompletionValue(const GraphicsAllocation &gfxAllocation);
DispatchMode getDispatchMode() const {
@ -424,8 +417,6 @@ class CommandStreamReceiver {
FrontEndPropertiesSupport feSupportFlags{};
PipelineSelectPropertiesSupport pipelineSupportFlags{};
// offset for debug state is 1kbyte, tag writes can use multiple offsets for multiple partitions and each offset can vary per platform
const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte;
uint64_t totalMemoryUsed = 0u;
volatile TagAddressType *tagAddress = nullptr;
@ -476,7 +467,6 @@ class CommandStreamReceiver {
uint32_t activePartitions = 1;
uint32_t activePartitionsConfig = 1;
uint32_t postSyncWriteOffset = 0;
uint32_t completionFenceOffset = 0;
TaskCountType completionFenceValue = 0;
const uint32_t rootDeviceIndex;

View File

@ -0,0 +1,17 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/constants.h"
namespace NEO {
namespace TagAllocationLayout {
inline constexpr uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte;
inline constexpr uint64_t completionFenceOffset = 2 * MemoryConstants::kiloByte;
} // namespace TagAllocationLayout
} // namespace NEO

View File

@ -7,6 +7,7 @@
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/device/device.h"
#include "shared/source/direct_submission/linux/drm_direct_submission.h"
#include "shared/source/os_interface/linux/drm_allocation.h"
@ -62,8 +63,8 @@ DrmDirectSubmission<GfxFamily, Dispatcher>::DrmDirectSubmission(const DirectSubm
if (DebugManager.flags.PrintCompletionFenceUsage.get()) {
std::cout << "Completion fence for DirectSubmission:"
<< " GPU address: " << std::hex << (this->completionFenceAllocation->getGpuAddress() + Drm::completionFenceOffset)
<< ", CPU address: " << (castToUint64(this->completionFenceAllocation->getUnderlyingBuffer()) + Drm::completionFenceOffset)
<< " GPU address: " << std::hex << (this->completionFenceAllocation->getGpuAddress() + TagAllocationLayout::completionFenceOffset)
<< ", CPU address: " << (castToUint64(this->completionFenceAllocation->getUnderlyingBuffer()) + TagAllocationLayout::completionFenceOffset)
<< std::dec << std::endl;
}
}
@ -79,7 +80,7 @@ inline DrmDirectSubmission<GfxFamily, Dispatcher>::~DrmDirectSubmission() {
if (this->isCompletionFenceSupported()) {
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
auto &drm = osContextLinux->getDrm();
auto completionFenceCpuAddress = reinterpret_cast<uint64_t>(this->completionFenceAllocation->getUnderlyingBuffer()) + Drm::completionFenceOffset;
auto completionFenceCpuAddress = reinterpret_cast<uint64_t>(this->completionFenceAllocation->getUnderlyingBuffer()) + TagAllocationLayout::completionFenceOffset;
drm.waitOnUserFences(*osContextLinux, completionFenceCpuAddress, this->completionFenceValue, this->activeTiles, this->postSyncOffset);
}
this->deallocateResources();
@ -124,7 +125,7 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
uint64_t completionFenceGpuAddress = 0u;
if (this->isCompletionFenceSupported()) {
completionValue = ++completionFenceValue;
completionFenceGpuAddress = this->completionFenceAllocation->getGpuAddress() + Drm::completionFenceOffset;
completionFenceGpuAddress = this->completionFenceAllocation->getGpuAddress() + TagAllocationLayout::completionFenceOffset;
}
for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) {

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/direct_submission/linux/drm_direct_submission.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/gmm_helper/client_context/gmm_client_context.h"
@ -39,8 +40,6 @@ DrmCommandStreamReceiver<GfxFamily>::DrmCommandStreamReceiver(ExecutionEnvironme
gemCloseWorkerMode mode)
: BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield), gemCloseWorkerOperationMode(mode) {
this->completionFenceOffset = Drm::completionFenceOffset;
auto rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex].get();
this->drm = rootDeviceEnvironment->osInterface->getDriverModel()->as<Drm>();
@ -226,7 +225,7 @@ int DrmCommandStreamReceiver<GfxFamily>::exec(const BatchBuffer &batchBuffer, ui
uint64_t completionGpuAddress = 0;
TaskCountType completionValue = 0;
if (this->drm->isVmBindAvailable() && this->drm->completionFenceSupport()) {
completionGpuAddress = getTagAllocation()->getGpuAddress() + (index * this->postSyncWriteOffset) + Drm::completionFenceOffset;
completionGpuAddress = getTagAllocation()->getGpuAddress() + (index * this->postSyncWriteOffset) + TagAllocationLayout::completionFenceOffset;
completionValue = this->latestSentTaskCount;
}

View File

@ -61,7 +61,6 @@ class Drm : public DriverModel {
public:
static constexpr DriverModelType driverModelType = DriverModelType::DRM;
static constexpr size_t completionFenceOffset = 1024;
static SubmissionStatus getSubmissionStatusFromReturnCode(int32_t retCode);

View File

@ -8,6 +8,7 @@
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/command_stream_receiver_simulated_hw.h"
#include "shared/source/command_stream/scratch_space_controller_base.h"
#include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/gmm_helper/page_table_mngr.h"
@ -77,9 +78,14 @@ TEST_F(CommandStreamReceiverTest, givenOsAgnosticCsrWhenGettingCompletionValueTh
EXPECT_EQ(expectedValue, commandStreamReceiver->getCompletionValue(allocation));
}
TEST_F(CommandStreamReceiverTest, givenOsAgnosticCsrWhenGettingCompletionAddressThenProperAddressIsReturned) {
TEST_F(CommandStreamReceiverTest, givenCsrWhenGettingCompletionAddressThenProperAddressIsReturned) {
auto expectedAddress = castToUint64(const_cast<TagAddressType *>(commandStreamReceiver->getTagAddress()));
EXPECT_EQ(expectedAddress, commandStreamReceiver->getCompletionAddress());
EXPECT_EQ(expectedAddress + TagAllocationLayout::completionFenceOffset, commandStreamReceiver->getCompletionAddress());
}
TEST_F(CommandStreamReceiverTest, givenCsrWhenGettingCompletionAddressThenUnderlyingMemoryIsZeroed) {
auto completionFence = reinterpret_cast<TaskCountType *>(commandStreamReceiver->getCompletionAddress());
EXPECT_EQ(0u, *completionFence);
}
HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenDefaultValuesAreSet) {

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h"
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
#include "shared/source/direct_submission/linux/drm_direct_submission.h"
@ -262,7 +263,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportAndFenceIsNotComple
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
ASSERT_TRUE(drm->completionFenceSupport());
auto completionFenceBaseCpuAddress = reinterpret_cast<uint64_t>(commandStreamReceiver.getTagAddress()) + Drm::completionFenceOffset;
auto completionFenceBaseCpuAddress = reinterpret_cast<uint64_t>(commandStreamReceiver.getTagAddress()) + TagAllocationLayout::completionFenceOffset;
uint32_t expectedCompletionValueToWait = 10u;
{
@ -360,7 +361,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTile0AndCompletionFenceSupportWhenSubmitt
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + TagAllocationLayout::completionFenceOffset;
DeviceBitfield firstTileBitfield{0b01};
OsContextLinux osContextTile0(*drm, 0, 0u,
@ -399,7 +400,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTile1AndCompletionFenceSupportWhenSubmitt
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + TagAllocationLayout::completionFenceOffset;
DeviceBitfield secondTileBitfield{0b10};
OsContextLinux osContextTile1(*drm, 0, 0u,
@ -438,7 +439,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTwoTilesAndCompletionFenceSupportWhenSubm
auto &commandStreamReceiver = device->getUltCommandStreamReceiver<FamilyType>();
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
auto completionFenceBaseGpuAddress = commandStreamReceiver.getTagAllocation()->getGpuAddress() + TagAllocationLayout::completionFenceOffset;
DeviceBitfield twoTilesBitfield{0b11};
OsContextLinux osContextBothTiles(*drm, 0, 0u,

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/os_interface/linux/drm_command_stream.h"
#include "shared/source/os_interface/linux/drm_memory_manager.h"
@ -119,7 +120,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsVmBindAndComplet
csr->makeResident(*allocation);
csr->makeResident(*csr->getTagAllocation());
uint64_t expectedCompletionGpuAddress = csr->getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset;
uint64_t expectedCompletionGpuAddress = csr->getTagAllocation()->getGpuAddress() + TagAllocationLayout::completionFenceOffset;
auto *testCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr);
testCsr->latestSentTaskCount = 2;

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/test/common/helpers/batch_buffer_helper.h"
#include "shared/test/common/mocks/linux/mock_drm_allocation.h"
@ -63,7 +64,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, whenGettingCompletionAddressThenOffsett
csr->initializeTagAllocation();
EXPECT_NE(nullptr, csr->getTagAddress());
uint64_t tagAddress = castToUint64(const_cast<TagAddressType *>(csr->getTagAddress()));
auto expectedAddress = tagAddress + Drm::completionFenceOffset;
auto expectedAddress = tagAddress + TagAllocationLayout::completionFenceOffset;
EXPECT_EQ(expectedAddress, csr->getCompletionAddress());
}

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/os_interface/linux/drm_command_stream.h"
#include "shared/source/os_interface/linux/drm_memory_manager.h"
@ -100,7 +101,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo
testCsr->latestSentTaskCount = 2;
testCsr->postSyncWriteOffset = 16;
uint64_t expectedCompletionGpuAddress = testCsr->getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset + testCsr->postSyncWriteOffset;
uint64_t expectedCompletionGpuAddress = testCsr->getTagAllocation()->getGpuAddress() + TagAllocationLayout::completionFenceOffset + testCsr->postSyncWriteOffset;
SubmissionStatus ret = testCsr->flushInternal(batchBuffer, testCsr->getResidencyAllocations());
EXPECT_EQ(SubmissionStatus::SUCCESS, ret);
@ -127,7 +128,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo
allocation->updateTaskCount(2, defaultEngine.osContext->getContextId());
volatile TagAddressType *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress();
completionAddress += (Drm::completionFenceOffset / sizeof(TagAddressType));
completionAddress += (TagAllocationLayout::completionFenceOffset / sizeof(TagAddressType));
*completionAddress = 1;
completionAddress += (postSyncOffset / sizeof(TagAddressType));
*completionAddress = 1;
@ -135,7 +136,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo
memoryManager->handleFenceCompletion(allocation);
uint64_t expectedAddress = castToUint64(const_cast<TagAddressType *>(defaultEngine.commandStreamReceiver->getTagAddress())) +
Drm::completionFenceOffset +
TagAllocationLayout::completionFenceOffset +
postSyncOffset;
constexpr uint64_t expectedValue = 2;
@ -161,7 +162,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo
allocation->updateTaskCount(2, defaultEngine.osContext->getContextId());
volatile TagAddressType *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress();
completionAddress += (Drm::completionFenceOffset / sizeof(TagAddressType));
completionAddress += (TagAllocationLayout::completionFenceOffset / sizeof(TagAddressType));
*completionAddress = 2; //1st context is ready
completionAddress += (postSyncOffset / sizeof(TagAddressType));
*completionAddress = 1;
@ -169,7 +170,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo
memoryManager->handleFenceCompletion(allocation);
uint64_t expectedAddress = castToUint64(const_cast<TagAddressType *>(defaultEngine.commandStreamReceiver->getTagAddress())) +
Drm::completionFenceOffset +
TagAllocationLayout::completionFenceOffset +
postSyncOffset;
constexpr uint64_t expectedValue = 2;

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/built_ins/sip.h"
#include "shared/source/command_stream/tag_allocation_layout.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/surface_format_info.h"
#include "shared/source/memory_manager/memory_banks.h"
@ -5501,7 +5502,7 @@ TEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionOf
auto engine = memoryManager->getRegisteredEngines()[0];
allocation->updateTaskCount(2, engine.osContext->getContextId());
uint64_t expectedFenceAddress = castToUint64(const_cast<TagAddressType *>(engine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset;
uint64_t expectedFenceAddress = castToUint64(const_cast<TagAddressType *>(engine.commandStreamReceiver->getTagAddress())) + TagAllocationLayout::completionFenceOffset;
constexpr uint64_t expectedValue = 2;
memoryManager->handleFenceCompletion(allocation);