Use dedicated using type for TaskCount

Related-To: NEO-7155

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2022-11-22 13:53:59 +00:00
committed by Compute-Runtime-Automation
parent 3f962bf3e8
commit 4b42b066f8
146 changed files with 568 additions and 529 deletions

View File

@@ -52,6 +52,7 @@ set(NEO_CORE_COMMAND_STREAM
${CMAKE_CURRENT_SOURCE_DIR}/submission_status.h
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h
${CMAKE_CURRENT_SOURCE_DIR}/task_count_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.h
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.h

View File

@@ -56,7 +56,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
void pollForCompletion() override;
void pollForCompletionImpl() override;
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
uint32_t getDumpHandle();
MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);
@@ -112,7 +112,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
bool isEngineInitialized = false;
ExternalAllocationsContainer externalAllocations;
uint32_t pollForCompletionTaskCount = 0u;
TaskCountType pollForCompletionTaskCount = 0u;
SpinLock pollForCompletionLock;
};
} // namespace NEO

View File

@@ -312,7 +312,7 @@ SubmissionStatus AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batch
if (subCaptureManager->isSubCaptureMode()) {
if (!subCaptureManager->isSubCaptureEnabled()) {
if (this->standalone) {
volatile uint32_t *pollAddress = this->tagAddress;
volatile TagAddressType *pollAddress = this->tagAddress;
for (uint32_t i = 0; i < this->activePartitions; i++) {
*pollAddress = this->peekLatestSentTaskCount();
pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset);
@@ -353,7 +353,7 @@ SubmissionStatus AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batch
submitBatchBufferAub(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation));
if (this->standalone) {
volatile uint32_t *pollAddress = this->tagAddress;
volatile TagAddressType *pollAddress = this->tagAddress;
for (uint32_t i = 0; i < this->activePartitions; i++) {
*pollAddress = this->peekLatestSentTaskCount();
pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset);
@@ -614,7 +614,7 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
}
template <typename GfxFamily>
inline WaitStatus AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
inline WaitStatus AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
const auto result = CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle);
pollForCompletion();

View File

@@ -180,7 +180,7 @@ void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gf
makeResident(*gfxAllocation);
}
WaitStatus CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) {
WaitStatus CommandStreamReceiver::waitForTaskCount(TaskCountType requiredTaskCount) {
auto address = getTagAddress();
if (!skipResourceCleanup() && address) {
this->downloadTagAllocation(requiredTaskCount);
@@ -190,7 +190,7 @@ WaitStatus CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) {
return WaitStatus::Ready;
}
WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) {
WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(TaskCountType requiredTaskCount, uint32_t allocationUsage) {
WaitStatus waitStatus{WaitStatus::Ready};
auto &list = allocationUsage == TEMPORARY_ALLOCATION ? internalAllocationStorage->getTemporaryAllocations() : internalAllocationStorage->getAllocationsForReuse();
if (!list.peekIsEmpty()) {
@@ -201,7 +201,7 @@ WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_
return waitStatus;
}
WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) {
WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount) {
return waitForTaskCountAndCleanAllocationList(requiredTaskCount, TEMPORARY_ALLOCATION);
}
@@ -358,13 +358,13 @@ void CommandStreamReceiver::cleanupResources() {
}
}
WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) {
WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) {
bool printWaitForCompletion = DebugManager.flags.LogWaitingForCompletion.get();
if (printWaitForCompletion) {
printTagAddressContent(taskCountToWait, params.waitTimeout, true);
}
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
TaskCountType latestSentTaskCount = this->latestFlushedTaskCount;
if (latestSentTaskCount < taskCountToWait) {
if (!this->flushBatchedSubmissions()) {
const auto isGpuHang{isGpuHangDetected()};
@@ -391,15 +391,15 @@ bool CommandStreamReceiver::checkGpuHangDetected(TimeType currentTime, TimeType
return false;
}
WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, const WaitParams &params, uint32_t taskCountToWait) {
WaitStatus CommandStreamReceiver::baseWaitFunction(volatile TagAddressType *pollAddress, const WaitParams &params, TaskCountType taskCountToWait) {
std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, currentTime;
int64_t timeDiff = 0;
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
TaskCountType latestSentTaskCount = this->latestFlushedTaskCount;
if (latestSentTaskCount < taskCountToWait) {
this->flushTagUpdate();
}
volatile uint32_t *partitionAddress = pollAddress;
volatile TagAddressType *partitionAddress = pollAddress;
waitStartTime = std::chrono::high_resolution_clock::now();
lastHangCheckTime = waitStartTime;
@@ -438,7 +438,7 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddres
void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
this->tagAllocation = allocation;
UNRECOVERABLE_IF(allocation == nullptr);
this->tagAddress = reinterpret_cast<uint32_t *>(allocation->getUnderlyingBuffer());
this->tagAddress = reinterpret_cast<TagAddressType *>(allocation->getUnderlyingBuffer());
this->debugPauseStateAddress = reinterpret_cast<DebugPauseState *>(
reinterpret_cast<uint8_t *>(allocation->getUnderlyingBuffer()) + debugPauseStateAddressOffset);
}
@@ -855,7 +855,7 @@ void CommandStreamReceiver::printDeviceIndex() {
}
}
void CommandStreamReceiver::checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation) {
void CommandStreamReceiver::checkForNewResources(TaskCountType submittedTaskCount, TaskCountType allocationTaskCount, GraphicsAllocation &gfxAllocation) {
if (useNewResourceImplicitFlush) {
if (allocationTaskCount == GraphicsAllocation::objectNotUsed && !GraphicsAllocation::isIsaAllocationType(gfxAllocation.getAllocationType())) {
newResources = true;
@@ -875,7 +875,7 @@ bool CommandStreamReceiver::checkImplicitFlushForGpuIdle() {
return false;
}
void CommandStreamReceiver::downloadTagAllocation(uint32_t taskCountToWait) {
void CommandStreamReceiver::downloadTagAllocation(TaskCountType taskCountToWait) {
if (this->getTagAllocation()) {
if (taskCountToWait && taskCountToWait <= this->peekLatestFlushedTaskCount()) {
this->downloadAllocation(*this->getTagAllocation());
@@ -883,7 +883,7 @@ void CommandStreamReceiver::downloadTagAllocation(uint32_t taskCountToWait) {
}
}
bool CommandStreamReceiver::testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait) {
bool CommandStreamReceiver::testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait) {
this->downloadTagAllocation(taskCountToWait);
for (uint32_t i = 0; i < activePartitions; i++) {
if (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) {
@@ -903,7 +903,7 @@ const RootDeviceEnvironment &CommandStreamReceiver::peekRootDeviceEnvironment()
return *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex];
}
uint32_t CommandStreamReceiver::getCompletionValue(const GraphicsAllocation &gfxAllocation) {
TaskCountType CommandStreamReceiver::getCompletionValue(const GraphicsAllocation &gfxAllocation) {
if (completionFenceValuePointer) {
return *completionFenceValuePointer;
}
@@ -920,7 +920,7 @@ bool CommandStreamReceiver::createPerDssBackedBuffer(Device &device) {
return perDssBackedBuffer != nullptr;
}
void CommandStreamReceiver::printTagAddressContent(uint32_t taskCountToWait, int64_t waitTimeout, bool start) {
void CommandStreamReceiver::printTagAddressContent(TaskCountType taskCountToWait, int64_t waitTimeout, bool start) {
auto postSyncAddress = getTagAddress();
if (start) {
PRINT_DEBUG_STRING(true, stdout,
@@ -941,7 +941,7 @@ LogicalStateHelper *CommandStreamReceiver::getLogicalStateHelper() const {
return logicalStateHelper.get();
}
uint32_t CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus status) {
TaskCountType CompletionStamp::getTaskCountFromSubmissionStatusError(SubmissionStatus status) {
switch (status) {
case SubmissionStatus::OUT_OF_HOST_MEMORY:
return CompletionStamp::outOfHostMemory;

View File

@@ -13,6 +13,7 @@
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/command_stream/submission_status.h"
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/command_stream/task_count_helper.h"
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/blit_commands_helper.h"
@@ -86,7 +87,7 @@ class CommandStreamReceiver {
virtual CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
virtual bool flushBatchedSubmissions() = 0;
MOCKABLE_VIRTUAL SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency);
@@ -112,9 +113,9 @@ class CommandStreamReceiver {
virtual GmmPageTableMngr *createPageTableManager() { return nullptr; }
bool needsPageTableManager() const;
MOCKABLE_VIRTUAL WaitStatus waitForTaskCount(uint32_t requiredTaskCount);
WaitStatus waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage);
MOCKABLE_VIRTUAL WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount);
MOCKABLE_VIRTUAL WaitStatus waitForTaskCount(TaskCountType requiredTaskCount);
WaitStatus waitForTaskCountAndCleanAllocationList(TaskCountType requiredTaskCount, uint32_t allocationUsage);
MOCKABLE_VIRTUAL WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(TaskCountType requiredTaskCount);
LinearStream &getCS(size_t minRequiredSize = 1024u);
OSInterface *getOSInterface() const;
@@ -129,19 +130,19 @@ class CommandStreamReceiver {
return tagsMultiAllocation;
}
MultiGraphicsAllocation &createTagsMultiAllocation();
volatile uint32_t *getTagAddress() const { return tagAddress; }
volatile TagAddressType *getTagAddress() const { return tagAddress; }
uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; }
virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; }
uint32_t peekTaskCount() const { return taskCount; }
TaskCountType peekTaskCount() const { return taskCount; }
uint32_t peekTaskLevel() const { return taskLevel; }
TaskCountType peekTaskLevel() const { return taskLevel; }
FlushStamp obtainCurrentFlushStamp() const;
uint32_t peekLatestSentTaskCount() const { return latestSentTaskCount; }
TaskCountType peekLatestSentTaskCount() const { return latestSentTaskCount; }
uint32_t peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; }
TaskCountType peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; }
void enableNTo1SubmissionModel() { this->nTo1SubmissionModelEnabled = true; }
bool isNTo1SubmissionModelEnabled() const { return this->nTo1SubmissionModelEnabled; }
@@ -165,10 +166,10 @@ class CommandStreamReceiver {
void requestStallingCommandsOnNextFlush() { stallingCommandsOnNextFlushRequired = true; }
bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; }
virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) = 0;
virtual WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait);
WaitStatus baseWaitFunction(volatile uint32_t *pollAddress, const WaitParams &params, uint32_t taskCountToWait);
MOCKABLE_VIRTUAL bool testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait);
virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) = 0;
virtual WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait);
WaitStatus baseWaitFunction(volatile TagAddressType *pollAddress, const WaitParams &params, TaskCountType taskCountToWait);
MOCKABLE_VIRTUAL bool testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait);
virtual void downloadAllocations(){};
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }
@@ -225,14 +226,14 @@ class CommandStreamReceiver {
virtual MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired, const HardwareInfo &hwInfo) const = 0;
void setLatestSentTaskCount(uint32_t latestSentTaskCount) {
void setLatestSentTaskCount(TaskCountType latestSentTaskCount) {
this->latestSentTaskCount = latestSentTaskCount;
}
void setLatestFlushedTaskCount(uint32_t latestFlushedTaskCount) {
void setLatestFlushedTaskCount(TaskCountType latestFlushedTaskCount) {
this->latestFlushedTaskCount = latestFlushedTaskCount;
}
virtual uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
virtual TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0;
virtual SubmissionStatus flushTagUpdate() = 0;
virtual void updateTagFromWait() = 0;
@@ -333,7 +334,7 @@ class CommandStreamReceiver {
MOCKABLE_VIRTUAL bool checkGpuHangDetected(TimeType currentTime, TimeType &lastHangCheckTime) const;
uint64_t getCompletionAddress() const {
uint64_t completionFenceAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
uint64_t completionFenceAddress = castToUint64(const_cast<TagAddressType *>(tagAddress));
if (completionFenceAddress == 0) {
return 0;
}
@@ -341,7 +342,7 @@ class CommandStreamReceiver {
return completionFenceAddress;
}
uint32_t getCompletionValue(const GraphicsAllocation &gfxAllocation);
TaskCountType getCompletionValue(const GraphicsAllocation &gfxAllocation);
DispatchMode getDispatchMode() const {
return this->dispatchMode;
}
@@ -387,10 +388,10 @@ class CommandStreamReceiver {
protected:
void cleanupResources();
void printDeviceIndex();
void checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation);
void checkForNewResources(TaskCountType submittedTaskCount, TaskCountType allocationTaskCount, GraphicsAllocation &gfxAllocation);
bool checkImplicitFlushForGpuIdle();
void downloadTagAllocation(uint32_t taskCountToWait);
void printTagAddressContent(uint32_t taskCountToWait, int64_t waitTimeout, bool start);
void downloadTagAllocation(TaskCountType taskCountToWait);
void printTagAddressContent(TaskCountType taskCountToWait, int64_t waitTimeout, bool start);
[[nodiscard]] MOCKABLE_VIRTUAL std::unique_lock<MutexType> obtainHostPtrSurfaceCreationLock();
std::unique_ptr<FlushStampTracker> flushStamp;
@@ -421,7 +422,7 @@ class CommandStreamReceiver {
const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte;
uint64_t totalMemoryUsed = 0u;
volatile uint32_t *tagAddress = nullptr;
volatile TagAddressType *tagAddress = nullptr;
volatile DebugPauseState *debugPauseStateAddress = nullptr;
SpinLock debugPauseStateLock;
static void *asyncDebugBreakConfirmation(void *arg);
@@ -441,14 +442,14 @@ class CommandStreamReceiver {
IndirectHeap *indirectHeap[IndirectHeap::Type::NUM_TYPES];
OsContext *osContext = nullptr;
uint32_t *completionFenceValuePointer = nullptr;
TaskCountType *completionFenceValuePointer = nullptr;
// current taskLevel. Used for determining if a PIPE_CONTROL is needed.
std::atomic<uint32_t> taskLevel{0};
std::atomic<uint32_t> latestSentTaskCount{0};
std::atomic<uint32_t> latestFlushedTaskCount{0};
std::atomic<TaskCountType> taskLevel{0};
std::atomic<TaskCountType> latestSentTaskCount{0};
std::atomic<TaskCountType> latestFlushedTaskCount{0};
// taskCount - # of tasks submitted
std::atomic<uint32_t> taskCount{0};
std::atomic<TaskCountType> taskCount{0};
std::atomic<uint32_t> numClients = 0u;
@@ -470,7 +471,7 @@ class CommandStreamReceiver {
uint32_t activePartitionsConfig = 1;
uint32_t postSyncWriteOffset = 0;
uint32_t completionFenceOffset = 0;
uint32_t completionFenceValue = 0;
TaskCountType completionFenceValue = 0;
const uint32_t rootDeviceIndex;
const DeviceBitfield deviceBitfield;

View File

@@ -43,7 +43,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override;
void forcePipeControl(NEO::LinearStream &commandStreamCSR);
@@ -77,7 +77,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
bool isPipelineSelectAlreadyProgrammed() const;
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo);
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
void collectStateBaseAddresPatchInfo(
uint64_t commandBufferAddress,
@@ -96,7 +96,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
return CommandStreamReceiverType::CSR_HW;
}
uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override;
SubmissionStatus flushTagUpdate() override;
SubmissionStatus flushMiFlushDW();

View File

@@ -178,7 +178,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
uint32_t taskLevel,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) {
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
@@ -926,7 +926,7 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPipelineSelect()
}
template <typename GfxFamily>
inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
const auto params = kmdNotifyHelper->obtainTimeoutParams(useQuickKmdSleep, *getTagAddress(), taskCountToWait, flushStampToWait, throttle, this->isKmdWaitModeActive(),
this->isAnyDirectSubmissionEnabled());
@@ -1075,7 +1075,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::unregisterDirectSubmissionFromCo
}
template <typename GfxFamily>
uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;

View File

@@ -40,7 +40,7 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR {
return CommandStreamReceiverType::CSR_HW_WITH_AUB;
}
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, QueueThrottle throttle) override;
size_t getPreferredTagPoolSize() const override { return 1; }

View File

@@ -78,7 +78,7 @@ void CommandStreamReceiverWithAUBDump<BaseCSR>::setupContext(OsContext &osContex
}
template <typename BaseCSR>
WaitStatus CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
WaitStatus CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, QueueThrottle throttle) {
if (aubCSR) {
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle);

View File

@@ -6,6 +6,7 @@
*/
#pragma once
#include "shared/source/command_stream/task_count_helper.h"
#include "shared/source/utilities/stackvec.h"
namespace NEO {
@@ -21,7 +22,7 @@ class CsrDependencies {
All
};
StackVec<std::pair<uint32_t, uint64_t>, 32> taskCountContainer;
StackVec<std::pair<TaskCountType, uint64_t>, 32> taskCountContainer;
StackVec<TimestampPacketContainer *, 32> timestampPacketContainer;
void makeResident(CommandStreamReceiver &commandStreamReceiver) const;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -43,7 +43,7 @@ class ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) = 0;
@@ -62,14 +62,14 @@ class ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) = 0;
virtual void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,

View File

@@ -26,7 +26,7 @@ void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
@@ -85,7 +85,7 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
uint32_t offset,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
@@ -94,7 +94,7 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -18,7 +18,7 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
@@ -31,14 +31,14 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,

View File

@@ -54,7 +54,7 @@ void ScratchSpaceControllerXeHPAndLater::setRequiredScratchSpace(void *sshBaseAd
uint32_t offset,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
@@ -135,7 +135,7 @@ void ScratchSpaceControllerXeHPAndLater::reserveHeap(IndirectHeap::Type heapType
void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
@@ -152,7 +152,7 @@ void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(B
void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &scratchSurfaceDirty,
@@ -193,7 +193,7 @@ void ScratchSpaceControllerXeHPAndLater::programHeaps(HeapContainer &heapContain
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
@@ -38,14 +38,14 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
@@ -56,7 +56,7 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
MOCKABLE_VIRTUAL void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation);
MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &scratchSurfaceDirty,

View File

@@ -63,7 +63,7 @@ struct CommandBuffer : public IDNode<CommandBuffer> {
BatchBuffer batchBuffer;
void *batchBufferEndLocation = nullptr;
uint32_t inspectionId = 0;
uint32_t taskCount = 0u;
TaskCountType taskCount = 0u;
void *pipeControlThatMayBeErasedLocation = nullptr;
void *epiloguePipeControlLocation = nullptr;
PipeControlArgs epiloguePipeControlArgs;

View File

@@ -0,0 +1,12 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <stdint.h>
using TaskCountType = uint32_t;
using TagAddressType = uint32_t;

View File

@@ -33,7 +33,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
uint32_t getMaskAndValueForPollForCompletion() const;
bool getpollNotEqualValueForPollForCompletion() const;
void flushSubmissionsAndDownloadAllocations(uint32_t taskCount);
void flushSubmissionsAndDownloadAllocations(TaskCountType taskCount);
public:
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initAdditionalMMIO;
@@ -45,8 +45,8 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
WaitStatus waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) override;
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override;
WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override;
void downloadAllocations() override;
void downloadAllocationTbx(GraphicsAllocation &gfxAllocation);

View File

@@ -474,14 +474,14 @@ bool TbxCommandStreamReceiverHw<GfxFamily>::expectMemory(const void *gfxAddress,
}
template <typename GfxFamily>
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations(uint32_t taskCountToWait) {
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait) {
this->flushBatchedSubmissions();
if (this->latestFlushedTaskCount < taskCountToWait) {
this->flushTagUpdate();
}
volatile uint32_t *pollAddress = this->getTagAddress();
volatile TagAddressType *pollAddress = this->getTagAddress();
for (uint32_t i = 0; i < this->activePartitions; i++) {
while (*pollAddress < this->latestFlushedTaskCount) {
this->downloadAllocation(*this->getTagAllocation());
@@ -497,13 +497,13 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
}
template <typename GfxFamily>
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
flushSubmissionsAndDownloadAllocations(taskCountToWait);
return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle);
}
template <typename GfxFamily>
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(const WaitParams &params, uint32_t taskCountToWait) {
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) {
flushSubmissionsAndDownloadAllocations(taskCountToWait);
return BaseClass::waitForCompletionWithTimeout(params, taskCountToWait);
}
@@ -554,7 +554,7 @@ void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocationTbx(GraphicsAlloca
template <typename GfxFamily>
void TbxCommandStreamReceiverHw<GfxFamily>::downloadAllocations() {
volatile uint32_t *pollAddress = this->getTagAddress();
volatile TagAddressType *pollAddress = this->getTagAddress();
for (uint32_t i = 0; i < this->activePartitions; i++) {
while (*pollAddress < this->latestFlushedTaskCount) {
this->downloadAllocation(*this->getTagAllocation());