mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-07 12:42:54 +08:00
Change fences to use tag allocation
Instead of creating new allocation per fence, use the task count. Fence synchronize will wait for task count update. Related-To: NEO-6634 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8a4d28ef8b
commit
fb1a008414
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -18,52 +18,27 @@ namespace L0 {
|
||||
Fence *Fence::create(CommandQueueImp *cmdQueue, const ze_fence_desc_t *desc) {
|
||||
auto fence = new FenceImp(cmdQueue);
|
||||
UNRECOVERABLE_IF(fence == nullptr);
|
||||
|
||||
fence->initialize();
|
||||
|
||||
fence->reset();
|
||||
return fence;
|
||||
}
|
||||
|
||||
FenceImp::~FenceImp() {
|
||||
cmdQueue->getDevice()->getDriverHandle()->getMemoryManager()->freeGraphicsMemory(allocation);
|
||||
allocation = nullptr;
|
||||
}
|
||||
|
||||
ze_result_t FenceImp::queryStatus() {
|
||||
auto csr = cmdQueue->getCsr();
|
||||
csr->downloadAllocations();
|
||||
|
||||
volatile uint32_t *hostAddr = static_cast<uint32_t *>(allocation->getUnderlyingBuffer());
|
||||
uint32_t queryVal = Fence::STATE_CLEARED;
|
||||
for (uint32_t i = 0; i < partitionCount; i++) {
|
||||
queryVal = *hostAddr;
|
||||
if (queryVal == Fence::STATE_CLEARED) {
|
||||
break;
|
||||
}
|
||||
hostAddr = ptrOffset(hostAddr, csr->getPostSyncWriteOffset());
|
||||
}
|
||||
return queryVal == Fence::STATE_CLEARED ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
|
||||
auto *hostAddr = csr->getTagAddress();
|
||||
|
||||
return csr->testTaskCountReady(hostAddr, taskCount) ? ZE_RESULT_SUCCESS : ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
void FenceImp::initialize() {
|
||||
NEO::AllocationProperties properties(
|
||||
cmdQueue->getDevice()->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, cmdQueue->getDevice()->getNEODevice()->getDeviceBitfield());
|
||||
properties.alignment = MemoryConstants::pageSize;
|
||||
allocation = cmdQueue->getDevice()->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
UNRECOVERABLE_IF(allocation == nullptr);
|
||||
|
||||
reset();
|
||||
ze_result_t FenceImp::assignTaskCountFromCsr() {
|
||||
auto csr = cmdQueue->getCsr();
|
||||
taskCount = csr->peekTaskCount() + 1;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t FenceImp::reset() {
|
||||
constexpr uint32_t maxPartitionCount = 16;
|
||||
volatile uint32_t *hostAddress = static_cast<uint32_t *>(allocation->getUnderlyingBuffer());
|
||||
for (uint32_t i = 0; i < maxPartitionCount; i++) {
|
||||
*hostAddress = Fence::STATE_CLEARED;
|
||||
NEO::CpuIntrinsics::clFlush(const_cast<uint32_t *>(hostAddress));
|
||||
hostAddress = ptrOffset(hostAddress, cmdQueue->getCsr()->getPostSyncWriteOffset());
|
||||
}
|
||||
partitionCount = 1;
|
||||
taskCount = std::numeric_limits<uint32_t>::max();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -76,6 +51,10 @@ ze_result_t FenceImp::hostSynchronize(uint64_t timeout) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (std::numeric_limits<uint32_t>::max() == taskCount) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
if (timeout == 0) {
|
||||
return queryStatus();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -25,42 +25,25 @@ struct Fence : _ze_fence_handle_t {
|
||||
virtual ze_result_t destroy() = 0;
|
||||
virtual ze_result_t hostSynchronize(uint64_t timeout) = 0;
|
||||
virtual ze_result_t queryStatus() = 0;
|
||||
virtual ze_result_t assignTaskCountFromCsr() = 0;
|
||||
virtual ze_result_t reset() = 0;
|
||||
|
||||
static Fence *fromHandle(ze_fence_handle_t handle) { return static_cast<Fence *>(handle); }
|
||||
|
||||
inline ze_fence_handle_t toHandle() { return this; }
|
||||
|
||||
enum State : uint32_t {
|
||||
STATE_SIGNALED = 0u,
|
||||
STATE_CLEARED = std::numeric_limits<uint32_t>::max(),
|
||||
STATE_INITIAL = STATE_CLEARED
|
||||
};
|
||||
|
||||
enum EnqueueState : uint32_t { ENQUEUE_NOT_READY = 0u,
|
||||
ENQUEUE_READY };
|
||||
|
||||
NEO::GraphicsAllocation &getAllocation() const { return *allocation; }
|
||||
|
||||
uint64_t getGpuAddress() {
|
||||
UNRECOVERABLE_IF(allocation == nullptr);
|
||||
return allocation->getGpuAddress();
|
||||
}
|
||||
|
||||
void setPartitionCount(uint32_t newPartitionCount) {
|
||||
partitionCount = newPartitionCount;
|
||||
}
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *allocation = nullptr;
|
||||
uint32_t partitionCount = 1;
|
||||
uint32_t taskCount = 0;
|
||||
};
|
||||
|
||||
struct FenceImp : public Fence {
|
||||
FenceImp(CommandQueueImp *cmdQueueImp) : cmdQueue(cmdQueueImp) {}
|
||||
|
||||
~FenceImp() override;
|
||||
|
||||
ze_result_t destroy() override {
|
||||
delete this;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
@@ -70,9 +53,9 @@ struct FenceImp : public Fence {
|
||||
|
||||
ze_result_t queryStatus() override;
|
||||
|
||||
ze_result_t reset() override;
|
||||
ze_result_t assignTaskCountFromCsr() override;
|
||||
|
||||
void initialize();
|
||||
ze_result_t reset() override;
|
||||
|
||||
protected:
|
||||
CommandQueueImp *cmdQueue;
|
||||
|
||||
Reference in New Issue
Block a user