Add reset partition count and all partitions in Fence object

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-09-08 22:43:33 +00:00
committed by Compute-Runtime-Automation
parent 9d99ee1636
commit ddf76ef0b2
6 changed files with 109 additions and 12 deletions

View File

@@ -431,8 +431,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
NEO::PipeControlArgs args(true);
if (partitionCount > 1) {
args.workloadPartitionOffset = true;
fence->setPartitionCount(partitionCount);
}
fence->setPartitionCount(partitionCount);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
fence->getGpuAddress(),

View File

@@ -37,10 +37,10 @@ ze_result_t FenceImp::queryStatus() {
csr->downloadAllocations();
}
void *hostAddr = static_cast<uint64_t *>(allocation->getUnderlyingBuffer());
volatile uint32_t *hostAddr = static_cast<uint32_t *>(allocation->getUnderlyingBuffer());
uint32_t queryVal = Fence::STATE_CLEARED;
for (uint32_t i = 0; i < partitionCount; i++) {
memcpy_s(static_cast<void *>(&queryVal), sizeof(uint32_t), hostAddr, sizeof(uint32_t));
queryVal = *hostAddr;
if (queryVal == Fence::STATE_CLEARED) {
break;
}
@@ -55,16 +55,17 @@ void FenceImp::initialize() {
properties.alignment = MemoryConstants::cacheLineSize;
allocation = cmdQueue->getDevice()->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
UNRECOVERABLE_IF(allocation == nullptr);
reset();
}
ze_result_t FenceImp::reset() {
auto hostAddress = static_cast<uint64_t *>(allocation->getUnderlyingBuffer());
*(hostAddress) = Fence::STATE_CLEARED;
NEO::CpuIntrinsics::clFlush(hostAddress);
volatile uint32_t *hostAddress = static_cast<uint32_t *>(allocation->getUnderlyingBuffer());
for (uint32_t i = 0; i < partitionCount; i++) {
*hostAddress = Fence::STATE_CLEARED;
NEO::CpuIntrinsics::clFlush(const_cast<uint32_t *>(hostAddress));
hostAddress = ptrOffset(hostAddress, CommandQueueImp::addressOffset);
}
partitionCount = 1;
return ZE_RESULT_SUCCESS;
}

View File

@@ -29,6 +29,7 @@ set(L0_MOCKS_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_fence.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_resource_info_l0.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_host_pointer_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h

View File

@@ -0,0 +1,61 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"
#include "level_zero/core/source/fence/fence.h"
#include "level_zero/core/test/unit_tests/mock.h"
#include "level_zero/core/test/unit_tests/white_box.h"
#include "gmock/gmock.h"
namespace L0 {
namespace ult {
template <>
struct WhiteBox<::L0::Fence> : public ::L0::Fence {
using ::L0::Fence::allocation;
using ::L0::Fence::partitionCount;
};
using Fence = WhiteBox<::L0::Fence>;
template <>
struct Mock<Fence> : public Fence {
Mock() : mockAllocation(0, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY,
&memory, reinterpret_cast<uint64_t>(&memory), 0, sizeof(memory),
MemoryPool::System4KBPages) {
allocation = &mockAllocation;
}
~Mock() override = default;
MOCK_METHOD(ze_result_t,
destroy,
(),
(override));
MOCK_METHOD(ze_result_t,
hostSynchronize,
(uint64_t),
(override));
MOCK_METHOD(ze_result_t,
queryStatus,
(),
(override));
MOCK_METHOD(ze_result_t,
reset,
(),
(override));
// Fake an allocation for event memory
alignas(16) uint32_t memory = -1;
NEO::GraphicsAllocation mockAllocation;
};
} // namespace ult
} // namespace L0

View File

@@ -17,6 +17,7 @@
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
@@ -825,10 +826,12 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
ze_fence_desc_t fenceDesc{};
auto fence = whitebox_cast(Fence::create(commandQueue, &fenceDesc));
ASSERT_NE(nullptr, fence);
EXPECT_EQ(1u, fence->partitionCount);
ze_fence_handle_t fenceHandle = fence->toHandle();
ASSERT_NE(nullptr, commandQueue->commandStream);
fence->partitionCount = 2;
//1st execute call initialized pipeline
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
@@ -839,6 +842,7 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
size_t cmdBufferSizeWithoutMmioProgramming = usedSpaceAfter - usedSpaceBefore;
EXPECT_EQ(1u, fence->partitionCount);
auto workPartitionAddress = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
@@ -853,6 +857,7 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
usedSpaceAfter = commandQueue->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
size_t cmdBufferSizeWithtMmioProgramming = usedSpaceAfter - usedSpaceBefore;
EXPECT_EQ(2u, fence->partitionCount);
size_t expectedSizeWithMmioProgramming = cmdBufferSizeWithoutMmioProgramming + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM);
EXPECT_GE(expectedSizeWithMmioProgramming, cmdBufferSizeWithtMmioProgramming);

View File

@@ -14,6 +14,7 @@
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
namespace L0 {
namespace ult {
@@ -58,7 +59,7 @@ TEST_F(FenceTest, whenQueryingStatusAndStateSignaledThenReturnSuccess) {
EXPECT_NE(nullptr, fence);
auto &graphicsAllocation = fence->getAllocation();
auto hostAddr = static_cast<uint64_t *>(graphicsAllocation.getUnderlyingBuffer());
auto hostAddr = static_cast<uint32_t *>(graphicsAllocation.getUnderlyingBuffer());
*hostAddr = Fence::STATE_SIGNALED;
auto status = fence->queryStatus();
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
@@ -101,7 +102,7 @@ TEST_F(FenceSynchronizeTest, givenCallToFenceHostSynchronizeWithTimeoutZeroAndSt
fence = std::unique_ptr<L0::Fence>(L0::Fence::create(&cmdQueue, nullptr));
EXPECT_NE(nullptr, fence);
auto alloc = &(fence->getAllocation());
auto hostAddr = static_cast<uint64_t *>(alloc->getUnderlyingBuffer());
auto hostAddr = static_cast<uint32_t *>(alloc->getUnderlyingBuffer());
*hostAddr = Fence::STATE_SIGNALED;
ze_result_t result = fence->hostSynchronize(0);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -116,12 +117,40 @@ TEST_F(FenceSynchronizeTest, givenCallToFenceHostSynchronizeWithTimeoutNonZeroAn
fence = std::unique_ptr<L0::Fence>(L0::Fence::create(&cmdQueue, nullptr));
EXPECT_NE(nullptr, fence);
auto alloc = &(fence->getAllocation());
auto hostAddr = static_cast<uint64_t *>(alloc->getUnderlyingBuffer());
auto hostAddr = static_cast<uint32_t *>(alloc->getUnderlyingBuffer());
*hostAddr = Fence::STATE_SIGNALED;
ze_result_t result = fence->hostSynchronize(10);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
TEST_F(FenceSynchronizeTest, givenMultiplePartitionsWhenFenceIsResetThenAllPartitionFenceStatesAreReset) {
std::unique_ptr<MockCommandStreamReceiver> csr = nullptr;
csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
Mock<CommandQueue> cmdQueue(device, csr.get());
auto fence = whitebox_cast(Fence::create(&cmdQueue, nullptr));
EXPECT_NE(nullptr, fence);
fence->partitionCount = 2;
auto alloc = &(fence->getAllocation());
auto hostAddr = static_cast<uint32_t *>(alloc->getUnderlyingBuffer());
*hostAddr = Fence::STATE_SIGNALED;
hostAddr = ptrOffset(hostAddr, CommandQueueImp::addressOffset);
*hostAddr = Fence::STATE_SIGNALED;
ze_result_t result = fence->reset();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
hostAddr = static_cast<uint32_t *>(alloc->getUnderlyingBuffer());
EXPECT_EQ(Fence::STATE_CLEARED, *hostAddr);
hostAddr = ptrOffset(hostAddr, CommandQueueImp::addressOffset);
EXPECT_EQ(Fence::STATE_CLEARED, *hostAddr);
EXPECT_EQ(1u, fence->partitionCount);
fence->destroy();
}
using FenceAubCsrTest = Test<DeviceFixture>;
HWTEST_F(FenceAubCsrTest, givenCallToFenceHostSynchronizeWithAubModeCsrReturnsSuccess) {