Use L3_CONTROL only for DG1

Related-To: LOCI-1877
Signed-off-by: Young Jin Yoon <young.jin.yoon@intel.com>
This commit is contained in:
Young Jin Yoon
2021-02-10 15:20:50 +00:00
committed by Compute-Runtime-Automation
parent a79f67958e
commit 6f555d6258
18 changed files with 1414 additions and 13 deletions

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019-2020 Intel Corporation
# Copyright (C) 2019-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -9,12 +9,12 @@ if(SUPPORT_GEN12LP)
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen12lp.h
${CMAKE_CURRENT_SOURCE_DIR}/debugger_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/cache_flush_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/definitions/cache_flush_gen12lp.inl
${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_gen12lp.cpp
)
add_subdirectories()
target_include_directories(${L0_STATIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/)
target_include_directories(${L0_STATIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/definitions/)
target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_GEN12LP})
set_property(GLOBAL APPEND PROPERTY L0_HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP})

View File

@@ -1,10 +1,12 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/cache_flush.inl"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "pipe_control_args.h"
@@ -15,9 +17,61 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t numRanges,
const size_t *pRangeSizes,
const void **pRanges) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(),
args);
}
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto &hwInfo = commandContainer.getDevice()->getHardwareInfo();
bool supportL3Control = hwInfo.capabilityTable.supportCacheFlushAfterWalker;
if (!supportL3Control) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(),
args);
} else {
NEO::LinearStream *commandStream = commandContainer.getCommandStream();
NEO::SVMAllocsManager *svmAllocsManager =
device->getDriverHandle()->getSvmAllocsManager();
for (uint32_t i = 0; i < numRanges; i++) {
StackVec<NEO::L3Range, NEO::maxFlushSubrangeCount> subranges;
const uint64_t pRange = reinterpret_cast<uint64_t>(pRanges[i]);
const size_t pRangeSize = pRangeSizes[i];
const uint64_t pEndRange = pRange + pRangeSize;
uint64_t pFlushRange;
size_t pFlushRangeSize;
uint64_t postSyncAddressToFlush = 0;
NEO::SvmAllocationData *allocData =
svmAllocsManager->getSVMAllocs()->get(pRanges[i]);
if (allocData == nullptr || pRangeSize > allocData->size) {
continue;
}
pFlushRange = pRange;
pFlushRangeSize = pRangeSize;
if (NEO::L3Range::meetsMinimumAlignment(pRange) == false) {
pFlushRange = alignDown(pRange, MemoryConstants::pageSize);
}
if (NEO::L3Range::meetsMinimumAlignment(pRangeSize) == false) {
pFlushRangeSize = alignUp(pRangeSize, MemoryConstants::pageSize);
}
bool isRangeSharedBetweenTwoPages =
(alignDown(pEndRange, MemoryConstants::pageSize) !=
pFlushRange);
if (isRangeSharedBetweenTwoPages) {
pFlushRangeSize += MemoryConstants::pageSize;
}
coverRangeExact(pFlushRange,
pFlushRangeSize,
subranges,
GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION);
NEO::flushGpuCache<GfxFamily>(commandStream, subranges,
postSyncAddressToFlush,
device->getHwInfo());
}
}
}
} // namespace L0

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -26,4 +26,4 @@ static CommandListPopulateFactory<IGFX_DG1, CommandListProductFamily<IGFX_DG1>>
static CommandListImmediatePopulateFactory<IGFX_DG1, CommandListImmediateProductFamily<IGFX_DG1>>
populateDG1Immediate;
} // namespace L0
} // namespace L0

View File

@@ -9,10 +9,11 @@ if(TESTS_GEN12LP)
${COMPUTE_RUNTIME_ULT_GEN12LP}
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/enable_l0_mocks_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_device_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_events_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_module_gen12lp.cpp
)
target_include_directories(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_DIR}/level_zero/core/source/gen12lp/definitions${BRANCH_DIR_SUFFIX}/)
target_include_directories(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_DIR}/level_zero/core/source/gen12lp/definitions/)
endif()

View File

@@ -0,0 +1,278 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "test.h"
#include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
namespace L0 {
namespace ult {
using CommandListCreate = Test<DeviceFixture>;
template <PRODUCT_FAMILY productFamily>
struct CommandListAdjustStateComputeMode : public WhiteBox<::L0::CommandListProductFamily<productFamily>> {
CommandListAdjustStateComputeMode() : WhiteBox<::L0::CommandListProductFamily<productFamily>>(1) {}
using ::L0::CommandListProductFamily<productFamily>::applyMemoryRangesBarrier;
};
HWTEST2_F(CommandListCreate, givenAllocationsWhenApplyRangesBarrierThenCheckWhetherL3ControlIsProgrammed, IsGen12LP) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using L3_CONTROL = typename GfxFamily::L3_CONTROL;
auto &hardwareInfo = this->neoDevice->getHardwareInfo();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Copy);
uint64_t gpuAddress = 0x1200;
void *buffer = reinterpret_cast<void *>(gpuAddress);
size_t size = 0x1100;
NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
NEO::SvmAllocationData allocData(0);
allocData.size = size;
allocData.gpuAllocations.addAllocation(&mockAllocation);
device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData);
const void *ranges[] = {buffer};
const size_t sizes[] = {size};
commandList->applyMemoryRangesBarrier(1, sizes, ranges);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<L3_CONTROL *>(cmdList.begin(), cmdList.end());
if (hardwareInfo.capabilityTable.supportCacheFlushAfterWalker) {
EXPECT_NE(cmdList.end(), itor);
} else {
EXPECT_EQ(cmdList.end(), itor);
}
}
HWTEST2_F(CommandListCreate, GivenHostMemoryNotInSvmManagerWhenAppendingMemoryBarrierThenAdditionalCommandsNotAdded,
IsDG1) {
ze_result_t result;
uint32_t numRanges = 1;
const size_t pRangeSizes = 1;
const char *_pRanges[pRangeSizes];
const void **pRanges = reinterpret_cast<const void **>(&_pRanges[0]);
auto commandList = new CommandListAdjustStateComputeMode<productFamily>();
bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute);
ASSERT_FALSE(ret);
auto usedSpaceBefore =
commandList->commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryRangesBarrier(numRanges, &pRangeSizes,
pRanges, nullptr, 0,
nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter =
commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_EQ(usedSpaceAfter, usedSpaceBefore);
commandList->destroy();
}
HWTEST2_F(CommandListCreate, GivenHostMemoryInSvmManagerWhenAppendingMemoryBarrierThenL3CommandsAdded,
IsDG1) {
ze_result_t result;
uint32_t numRanges = 1;
const size_t pRangeSizes = 1;
void *_pRanges;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
pRangeSizes,
4096u,
&_pRanges);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
const void **pRanges = const_cast<const void **>(&_pRanges);
auto commandList = new CommandListAdjustStateComputeMode<productFamily>();
bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute);
ASSERT_FALSE(ret);
auto usedSpaceBefore =
commandList->commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryRangesBarrier(numRanges, &pRangeSizes,
pRanges, nullptr, 0,
nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter =
commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_NE(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(
commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
usedSpaceAfter));
using L3_CONTROL = typename FamilyType::L3_CONTROL;
auto itorPC = find<L3_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPC);
{
using L3_FLUSH_EVICTION_POLICY = typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY;
auto cmd = genCmdCast<L3_CONTROL *>(*itorPC);
auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
auto isA0Stepping = (hwHelper.getSteppingFromHwRevId(device->getHwInfo()) == REVISION_A0);
auto maskedAddress = cmd->getL3FlushAddressRange().getAddress(isA0Stepping);
EXPECT_NE(maskedAddress, 0u);
EXPECT_EQ(reinterpret_cast<uint64_t>(*pRanges),
static_cast<uint64_t>(maskedAddress));
EXPECT_EQ(
cmd->getL3FlushAddressRange().getL3FlushEvictionPolicy(isA0Stepping),
L3_FLUSH_EVICTION_POLICY::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION);
}
commandList->destroy();
context->freeMem(_pRanges);
}
HWTEST2_F(CommandListCreate, GivenHostMemoryWhenAppendingMemoryBarrierThenAddressMisalignmentCorrected,
IsDG1) {
ze_result_t result;
uint32_t numRanges = 1;
const size_t misalignment_factor = 761;
const size_t pRangeSizes = 4096;
void *_pRanges;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
pRangeSizes,
4096u,
&_pRanges);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
unsigned char *c_pRanges = reinterpret_cast<unsigned char *>(_pRanges);
c_pRanges += misalignment_factor;
_pRanges = static_cast<void *>(c_pRanges);
const void **pRanges = const_cast<const void **>(&_pRanges);
auto commandList = new CommandListAdjustStateComputeMode<productFamily>();
bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute);
ASSERT_FALSE(ret);
auto usedSpaceBefore =
commandList->commandContainer.getCommandStream()->getUsed();
result = commandList->appendMemoryRangesBarrier(numRanges, &pRangeSizes,
pRanges, nullptr, 0,
nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter =
commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_NE(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(
commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
usedSpaceAfter));
using L3_CONTROL = typename FamilyType::L3_CONTROL;
auto itorPC = find<L3_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPC);
{
using L3_FLUSH_EVICTION_POLICY = typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY;
auto cmd = genCmdCast<L3_CONTROL *>(*itorPC);
auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
auto isA0Stepping = (hwHelper.getSteppingFromHwRevId(device->getHwInfo()) == REVISION_A0);
auto maskedAddress = cmd->getL3FlushAddressRange().getAddress(isA0Stepping);
EXPECT_NE(maskedAddress, 0u);
EXPECT_EQ(reinterpret_cast<uint64_t>(*pRanges) - misalignment_factor,
static_cast<uint64_t>(maskedAddress));
EXPECT_EQ(
cmd->getL3FlushAddressRange().getL3FlushEvictionPolicy(isA0Stepping),
L3_FLUSH_EVICTION_POLICY::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION);
}
commandList->destroy();
context->freeMem(_pRanges);
}
HWTEST2_F(CommandListCreate, givenAllocationsWhenApplyRangesBarrierWithInvalidAddressSizeThenL3ControlIsNotProgrammed, IsDG1) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using L3_CONTROL = typename GfxFamily::L3_CONTROL;
ze_result_t result;
const size_t pRangeSizes = 4096;
void *_pRanges;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
pRangeSizes,
4096u,
&_pRanges);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto commandList = new CommandListAdjustStateComputeMode<productFamily>();
ASSERT_NE(nullptr, commandList);
bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute);
ASSERT_FALSE(ret);
const void *ranges[] = {_pRanges};
const size_t sizes[] = {2 * pRangeSizes};
commandList->applyMemoryRangesBarrier(1, sizes, ranges);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<L3_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
commandList->destroy();
context->freeMem(_pRanges);
}
HWTEST2_F(CommandListCreate, givenAllocationsWhenApplyRangesBarrierWithInvalidAddressThenL3ControlIsNotProgrammed, IsDG1) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using L3_CONTROL = typename GfxFamily::L3_CONTROL;
ze_result_t result;
const size_t pRangeSizes = 4096;
void *_pRanges;
ze_device_mem_alloc_desc_t deviceDesc = {};
result = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
pRangeSizes,
4096u,
&_pRanges);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto commandList = new CommandListAdjustStateComputeMode<productFamily>();
ASSERT_NE(nullptr, commandList);
bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute);
ASSERT_FALSE(ret);
const void *ranges[] = {nullptr};
const size_t sizes[] = {pRangeSizes};
commandList->applyMemoryRangesBarrier(1, sizes, ranges);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<L3_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
commandList->destroy();
context->freeMem(_pRanges);
}
} // namespace ult
} // namespace L0

View File

@@ -1,11 +1,12 @@
#
# Copyright (C) 2020 Intel Corporation
# Copyright (C) 2020-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cache_flush_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/heap_assigner_l0_tests.cpp
)
add_subdirectories()

View File

@@ -0,0 +1,92 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/cache_flush.inl"
#include "shared/source/helpers/l3_range.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "test.h"
#include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
namespace L0 {
namespace ult {
using CacheFlushTests = Test<DeviceFixture>;
HWTEST2_F(CacheFlushTests, GivenCommandStreamWithSingleL3RangeAndNonZeroPostSyncAddressWhenFlushGpuCacheIsCalledThenPostSyncOperationIsSetForL3Control, IsDG1) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using L3_CONTROL = typename GfxFamily::L3_CONTROL;
auto &hardwareInfo = this->neoDevice->getHardwareInfo();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Copy);
LinearStream *cmdStream = commandList->commandContainer.getCommandStream();
uint64_t gpuAddress = 0x1200;
void *buffer = reinterpret_cast<void *>(gpuAddress);
size_t size = 0x1000;
uint64_t postSyncAddress = 0x1200;
NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
NEO::SvmAllocationData allocData(0);
allocData.size = size;
allocData.gpuAllocations.addAllocation(&mockAllocation);
device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData);
L3RangesVec ranges;
ranges.push_back(L3Range::fromAddressSizeWithPolicy(
gpuAddress, size,
GfxFamily::L3_FLUSH_ADDRESS_RANGE::
L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION));
NEO::flushGpuCache<GfxFamily>(cmdStream, ranges, postSyncAddress,
hardwareInfo);
GenCmdList cmdList;
EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(cmdStream->getCpuBase(), 0), cmdStream->getUsed()));
auto itor = find<L3_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST2_F(CacheFlushTests, GivenCommandStreamWithMultipleL3RangeAndUsePostSyncIsSetToTrueWhenGetSizeNeededToFlushGpuCacheIsCalledThenCorrectSizeIsReturned, IsDG1) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using L3_CONTROL = typename GfxFamily::L3_CONTROL;
uint64_t gpuAddress = 0x1200;
size_t size = 0x1000;
L3RangesVec ranges;
ranges.push_back(L3Range::fromAddressSizeWithPolicy(
gpuAddress, size,
GfxFamily::L3_FLUSH_ADDRESS_RANGE::
L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION));
EXPECT_NE(0u, ranges.size());
size_t ret = NEO::getSizeNeededToFlushGpuCache<GfxFamily>(ranges, true);
size_t expected = ranges.size() * sizeof(L3_CONTROL);
EXPECT_EQ(ret, expected);
}
HWTEST2_F(CacheFlushTests, GivenCommandStreamWithMultipleL3RangeAndUsePostSyncIsSetToFalseWhenGetSizeNeededToFlushGpuCacheIsCalledThenCorrectSizeIsReturned, IsDG1) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using L3_CONTROL = typename GfxFamily::L3_CONTROL;
uint64_t gpuAddress = 0x1200;
size_t size = 0x1000;
L3RangesVec ranges;
ranges.push_back(L3Range::fromAddressSizeWithPolicy(
gpuAddress, size,
GfxFamily::L3_FLUSH_ADDRESS_RANGE::
L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION));
EXPECT_NE(0u, ranges.size());
size_t ret = NEO::getSizeNeededToFlushGpuCache<GfxFamily>(ranges, false);
size_t expected = ranges.size() * sizeof(L3_CONTROL);
EXPECT_EQ(ret, expected);
}
} // namespace ult
} // namespace L0

View File

@@ -32,6 +32,7 @@ set(IGDRCL_SRCS_tests_helpers
${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_filename_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/l3_range_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_management_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_helpers_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mipmap_tests.cpp

View File

@@ -0,0 +1,292 @@
/*
* Copyright (C) 2016-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/l3_range.h"
#include "test.h"
using namespace NEO;
constexpr uint64_t l3RangeMinimumAlignment = MemoryConstants::pageSize;
constexpr uint64_t l3RangeMax = 4 * MemoryConstants::gigaByte;
const uint64_t defaultPolicy = 0;
TEST(L3Range, whenMeetsMinimumAlignmentThenTrueIsReturned) {
EXPECT_TRUE(L3Range::meetsMinimumAlignment(0));
EXPECT_TRUE(L3Range::meetsMinimumAlignment(l3RangeMinimumAlignment));
EXPECT_TRUE(L3Range::meetsMinimumAlignment(l3RangeMinimumAlignment * 2));
EXPECT_TRUE(L3Range::meetsMinimumAlignment(l3RangeMinimumAlignment * 3));
EXPECT_TRUE(L3Range::meetsMinimumAlignment(l3RangeMinimumAlignment * 4));
}
TEST(L3Range, whenDoesNotMeetMinimumAlignmentThenFalseIsReturned) {
EXPECT_FALSE(L3Range::meetsMinimumAlignment(1));
EXPECT_FALSE(L3Range::meetsMinimumAlignment(l3RangeMinimumAlignment - 1));
}
TEST(L3Range, whenValidSizeThenProperMaskFromSizeIsReturned) {
EXPECT_EQ(0U, L3Range::getMaskFromSize(l3RangeMinimumAlignment));
EXPECT_EQ(1U, L3Range::getMaskFromSize(l3RangeMinimumAlignment * 2));
EXPECT_EQ(2U, L3Range::getMaskFromSize(l3RangeMinimumAlignment * 4));
EXPECT_EQ(3U, L3Range::getMaskFromSize(l3RangeMinimumAlignment * 8));
EXPECT_EQ(19U, L3Range::getMaskFromSize(l3RangeMax / 2));
EXPECT_EQ(20U, L3Range::getMaskFromSize(l3RangeMax));
}
TEST(L3Range, whenNonPow2SizeThenMaskCalculationIsAborted) {
EXPECT_THROW(L3Range::getMaskFromSize(l3RangeMinimumAlignment + 1), std::exception);
}
TEST(L3Range, whenTooSmallSizeThenMaskCalculationIsAborted) {
EXPECT_THROW(L3Range::getMaskFromSize(0), std::exception);
}
TEST(L3Range, whenTooBigSizeThenMaskCalculationIsAborted) {
EXPECT_THROW(L3Range::getMaskFromSize(l3RangeMax * 2), std::exception);
}
TEST(L3Range, returnsProperSizeInBytesFromMask) {
L3Range range;
range.setMask(L3Range::getMaskFromSize(l3RangeMinimumAlignment));
EXPECT_EQ(l3RangeMinimumAlignment, range.getSizeInBytes());
range.setMask(L3Range::getMaskFromSize(l3RangeMinimumAlignment * 4));
EXPECT_EQ(l3RangeMinimumAlignment * 4, range.getSizeInBytes());
range.setMask(L3Range::getMaskFromSize(l3RangeMax));
EXPECT_EQ(l3RangeMax, range.getSizeInBytes());
}
TEST(L3Range, whenMaskGetsChangedThenReturnsProperlyMaskedAddress) {
L3Range range;
range.setAddress(l3RangeMinimumAlignment * 4 + l3RangeMinimumAlignment * 3 + 1);
range.setMask(0);
EXPECT_EQ(range.getAddress() & ~(l3RangeMinimumAlignment - 1), range.getMaskedAddress());
range.setMask(1);
EXPECT_EQ(range.getAddress() & ~((l3RangeMinimumAlignment << 1) - 1), range.getMaskedAddress());
range.setMask(2);
EXPECT_EQ(range.getAddress() & ~((l3RangeMinimumAlignment << 2) - 1), range.getMaskedAddress());
range.setMask(3);
EXPECT_EQ(0U, range.getMaskedAddress());
}
TEST(L3Range, whenCreatedFromAddressAndMaskThenAddressAndMaskAreProperlySet) {
{
L3Range range = L3Range::fromAddressMask(0U, 0U);
EXPECT_EQ(0U, range.getAddress());
EXPECT_EQ(0U, range.getMask());
}
{
L3Range range = L3Range::fromAddressMask(l3RangeMinimumAlignment, 1U);
EXPECT_EQ(l3RangeMinimumAlignment, range.getAddress());
EXPECT_EQ(1U, range.getMask());
}
{
L3Range range = L3Range::fromAddressMask(l3RangeMinimumAlignment * 2, 3U);
EXPECT_EQ(l3RangeMinimumAlignment * 2, range.getAddress());
EXPECT_EQ(3U, range.getMask());
}
}
TEST(L3Range, whenCreatedFromAddressAndSizeThenMaskIsProperlySet) {
{
L3Range range = L3Range::fromAddressSize(0, l3RangeMinimumAlignment);
EXPECT_EQ(0U, range.getAddress());
EXPECT_EQ(L3Range::getMaskFromSize(l3RangeMinimumAlignment), range.getMask());
}
{
L3Range range = L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment * 2);
EXPECT_EQ(l3RangeMinimumAlignment, range.getAddress());
EXPECT_EQ(L3Range::getMaskFromSize(l3RangeMinimumAlignment * 2), range.getMask());
}
{
L3Range range = L3Range::fromAddressSize(l3RangeMinimumAlignment * 2, l3RangeMax);
EXPECT_EQ(l3RangeMinimumAlignment * 2, range.getAddress());
EXPECT_EQ(L3Range::getMaskFromSize(l3RangeMax), range.getMask());
}
}
TEST(L3Range, whenComparOpIsEqualThenReturnsTrueOnlyIfSame) {
L3Range a = L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment * 2);
L3Range b = L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment * 2);
L3Range c = L3Range::fromAddressSize(0, l3RangeMinimumAlignment * 2);
L3Range d = L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment);
EXPECT_TRUE(a == b);
EXPECT_FALSE(a == c);
EXPECT_FALSE(c == a);
EXPECT_FALSE(a == d);
EXPECT_FALSE(d == a);
}
TEST(L3Range, whenComparOpIsNotEqualThenReturnsTrueOnlyIfNotSame) {
L3Range a = L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment * 2);
L3Range b = L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment * 2);
L3Range c = L3Range::fromAddressSize(0, l3RangeMinimumAlignment * 2);
L3Range d = L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment);
EXPECT_FALSE(a != b);
EXPECT_TRUE(a != c);
EXPECT_TRUE(c != a);
EXPECT_TRUE(a != d);
EXPECT_TRUE(d != a);
}
TEST(CoverRange, whenNonAlignedThenAbort) {
L3RangesVec ranges;
EXPECT_THROW(coverRangeExact(1, l3RangeMinimumAlignment, ranges, defaultPolicy), std::exception);
EXPECT_THROW(coverRangeExact(l3RangeMinimumAlignment, 1, ranges, defaultPolicy), std::exception);
EXPECT_THROW(coverRangeExact(1, 1, ranges, defaultPolicy), std::exception);
}
L3Range fromAdjacentRange(const L3Range &lhs, uint64_t size) {
L3Range ret;
ret.setAddress(lhs.getMaskedAddress() + lhs.getSizeInBytes());
ret.setMask(L3Range::getMaskFromSize(size));
return ret;
}
TEST(CoverRange, whenAlignedThenCoverWithProperSubranges) {
{
L3RangesVec actualRanges;
coverRangeExact(0, l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(0, l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "1 page, offset 0";
}
{
L3RangesVec actualRanges;
coverRangeExact(l3RangeMinimumAlignment, l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "1 page, offset 1";
}
// 2 pages
{
L3RangesVec actualRanges;
coverRangeExact(0, 2 * l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(0, 2 * l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "2 pages, offset 0";
}
{
L3RangesVec actualRanges;
coverRangeExact(l3RangeMinimumAlignment, 2 * l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment),
L3Range::fromAddressSize(2 * l3RangeMinimumAlignment, l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "2 pages, offset 1";
}
{
L3RangesVec actualRanges;
coverRangeExact(0, 3 * l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(0, 2 * l3RangeMinimumAlignment),
L3Range::fromAddressSize(2 * l3RangeMinimumAlignment, l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "3 pages, offset 0";
}
{
L3RangesVec actualRanges;
coverRangeExact(l3RangeMinimumAlignment, 3 * l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment),
L3Range::fromAddressSize(2 * l3RangeMinimumAlignment, 2 * l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "3 pages, offset 1";
}
{
L3RangesVec actualRanges;
coverRangeExact(2 * l3RangeMinimumAlignment, 3 * l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(2 * l3RangeMinimumAlignment, 2 * l3RangeMinimumAlignment),
L3Range::fromAddressSize(4 * l3RangeMinimumAlignment, l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "3 pages, offset 2";
}
{
L3RangesVec actualRanges;
coverRangeExact(0, 4 * l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(0, 4 * l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "4 pages, offset 0";
}
{
L3RangesVec actualRanges;
coverRangeExact(l3RangeMinimumAlignment, 4 * l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(l3RangeMinimumAlignment, l3RangeMinimumAlignment),
L3Range::fromAddressSize(2 * l3RangeMinimumAlignment, 2 * l3RangeMinimumAlignment),
L3Range::fromAddressSize(4 * l3RangeMinimumAlignment, l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "4 pages, offset 1";
}
{
L3RangesVec actualRanges;
coverRangeExact(2 * l3RangeMinimumAlignment, 4 * l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(2 * l3RangeMinimumAlignment, 2 * l3RangeMinimumAlignment),
L3Range::fromAddressSize(4 * l3RangeMinimumAlignment, 2 * l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "4 pages, offset 2";
}
{
L3RangesVec actualRanges;
coverRangeExact(3 * l3RangeMinimumAlignment, 4 * l3RangeMinimumAlignment, actualRanges, defaultPolicy);
L3RangesVec expectedRanges{L3Range::fromAddressSize(3 * l3RangeMinimumAlignment, l3RangeMinimumAlignment),
L3Range::fromAddressSize(4 * l3RangeMinimumAlignment, 2 * l3RangeMinimumAlignment),
L3Range::fromAddressSize(6 * l3RangeMinimumAlignment, l3RangeMinimumAlignment)};
EXPECT_EQ(expectedRanges, actualRanges) << "4 pages, offset 3";
}
{
uint64_t address = 3 * 4096;
uint64_t size = 1024 * 1024;
L3RangesVec actualRanges;
coverRangeExact(address, size, actualRanges, 0);
L3RangesVec expectedRanges;
expectedRanges.push_back(L3Range::fromAddressSize(address, 4096));
expectedRanges.push_back(fromAdjacentRange(*expectedRanges.rbegin(), 4 * 4096));
expectedRanges.push_back(fromAdjacentRange(*expectedRanges.rbegin(), 8 * 4096));
expectedRanges.push_back(fromAdjacentRange(*expectedRanges.rbegin(), 16 * 4096));
expectedRanges.push_back(fromAdjacentRange(*expectedRanges.rbegin(), 32 * 4096));
expectedRanges.push_back(fromAdjacentRange(*expectedRanges.rbegin(), 64 * 4096));
expectedRanges.push_back(fromAdjacentRange(*expectedRanges.rbegin(), 128 * 4096));
expectedRanges.push_back(fromAdjacentRange(*expectedRanges.rbegin(), 2 * 4096));
expectedRanges.push_back(fromAdjacentRange(*expectedRanges.rbegin(), 1 * 4096));
EXPECT_EQ(expectedRanges, actualRanges) << "1MB, offset 3 pages";
}
}
TEST(CoverRange, whenRangeCreatedWithPolicyThenAllParamsSetCorrectly) {
L3Range range = L3Range::fromAddressSizeWithPolicy(0, l3RangeMinimumAlignment, defaultPolicy);
EXPECT_EQ(0U, range.getAddress());
EXPECT_EQ(L3Range::getMaskFromSize(l3RangeMinimumAlignment), range.getMask());
EXPECT_EQ(range.getPolicy(), defaultPolicy);
auto policy = defaultPolicy + 1;
L3Range range2 = L3Range::fromAddressSizeWithPolicy(0, l3RangeMinimumAlignment, policy);
EXPECT_EQ(range2.getPolicy(), policy);
}

View File

@@ -6,6 +6,7 @@
set(IGDRCL_SRCS_tests_kernel
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/cache_flush_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/clone_kernel_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_transformer_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_accelerator_arg_tests.cpp

View File

@@ -0,0 +1,421 @@
/*
* Copyright (C) 2016-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/l3_range.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/embargo/resource_barrier.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
#include "opencl/test/unit_test/helpers/cmd_buffer_validator.h"
#include "opencl/test/unit_test/helpers/hardware_commands_helper_tests.h"
#include "opencl/test/unit_test/helpers/static_size3.h"
#include "opencl/test/unit_test/mocks/mock_allocation_properties.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "test.h"
using namespace NEO;
template <typename FamilyType>
struct L3ControlPolicy : CmdValidator {
L3ControlPolicy(typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY expectedPolicy, bool isA0Stepping)
: expectedPolicy(expectedPolicy), isA0Stepping(isA0Stepping) {
}
bool operator()(GenCmdList::iterator it, size_t numInScetion, const std::string &member, std::string &outReason) override {
using L3_CONTROL = typename FamilyType::L3_CONTROL;
auto l3ControlAddress = genCmdCast<L3_CONTROL *>(*it)->getL3FlushAddressRange();
if (l3ControlAddress.getL3FlushEvictionPolicy(isA0Stepping) != expectedPolicy) {
outReason = "Invalid L3_FLUSH_EVICTION_POLICY - expected: " + std::to_string(expectedPolicy) + ", got :" + std::to_string(l3ControlAddress.getL3FlushEvictionPolicy(isA0Stepping));
return false;
}
l3RangesParsed.push_back(L3Range::fromAddressMask(l3ControlAddress.getAddress(isA0Stepping), l3ControlAddress.getAddressMask(isA0Stepping)));
return true;
}
L3RangesVec l3RangesParsed;
typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY expectedPolicy;
bool isA0Stepping;
};
template <typename FamilyType>
class GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand : public HardwareCommandsTest {
public:
void TestBodyImpl() {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
CommandQueueHw<FamilyType> cmdQ(nullptr, pClDevice, 0, false);
auto &commandStream = cmdQ.getCS(1024);
void *allocPtr = reinterpret_cast<void *>(static_cast<uintptr_t>(6 * MemoryConstants::pageSize));
MockGraphicsAllocation svmAllocation{allocPtr, MemoryConstants::pageSize * 2};
svmAllocation.setFlushL3Required(true);
this->mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation);
this->mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true;
size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(L3_CONTROL_WITHOUT_POST_SYNC);
size_t actualSize = HardwareCommandsHelper<FamilyType>::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, 0U);
EXPECT_EQ(expectedSize, actualSize);
HardwareCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U);
std::string err;
auto cmdBuffOk = expectCmdBuff<FamilyType>(cmdQ.getCS(0), 0,
std::vector<MatchCmd *>({
new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}),
new MatchHwCmd<FamilyType, L3_CONTROL_WITHOUT_POST_SYNC>(AtLeastOne),
}),
&err);
EXPECT_TRUE(cmdBuffOk) << err;
}
};
template <typename FamilyType>
class GivenCacheFlushAfterWalkerEnabledAndProperSteppingIsSetWhenKernelArgIsSetAsCacheFlushRequiredAndA0SteppingIsDisabledThenExpectCacheFlushCommand : public HardwareCommandsTest {
public:
void TestBodyImpl(bool isA0Stepping) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE;
using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto stepping = (isA0Stepping ? REVISION_A0 : REVISION_A1);
hardwareInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(stepping, hardwareInfo);
pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&hardwareInfo);
CommandQueueHw<FamilyType> cmdQ(nullptr, pClDevice, 0, false);
auto &commandStream = cmdQ.getCS(1024);
addSpaceForSingleKernelArg();
this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(2);
void *allocPtr = reinterpret_cast<void *>(static_cast<uintptr_t>(6 * MemoryConstants::pageSize));
MockGraphicsAllocation cacheRequiringAllocation{allocPtr, MemoryConstants::pageSize * 7};
this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation;
L3RangesVec rangesExpected;
coverRangeExact(cacheRequiringAllocation.getGpuAddress(), cacheRequiringAllocation.getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION);
size_t expectedSize = sizeof(PIPE_CONTROL) + rangesExpected.size() * sizeof(L3_CONTROL_WITHOUT_POST_SYNC);
size_t actualSize = HardwareCommandsHelper<FamilyType>::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, 0U);
EXPECT_EQ(expectedSize, actualSize);
HardwareCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U);
L3ControlPolicy<FamilyType> validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping};
std::string err;
auto cmdBuffOk = expectCmdBuff<FamilyType>(cmdQ.getCS(0), 0,
std::vector<MatchCmd *>{
new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}),
new MatchHwCmd<FamilyType, L3_CONTROL_WITHOUT_POST_SYNC>(AtLeastOne, {&validateL3ControlPolicy}),
},
&err);
EXPECT_TRUE(cmdBuffOk) << err;
EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed);
}
};
template <typename FamilyType>
class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand : public HardwareCommandsTest {
public:
void TestBodyImpl() {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
CommandQueueHw<FamilyType> cmdQ(nullptr, pClDevice, 0, false);
auto &commandStream = cmdQ.getCS(1024);
void *allocPtr = reinterpret_cast<void *>(static_cast<uintptr_t>(6 * MemoryConstants::pageSize));
MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2};
this->mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation);
size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(L3_CONTROL_WITHOUT_POST_SYNC);
size_t actualSize = HardwareCommandsHelper<FamilyType>::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, 0U);
EXPECT_EQ(expectedSize, actualSize);
HardwareCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U);
std::string err;
auto cmdBuffOk = expectCmdBuff<FamilyType>(cmdQ.getCS(0), 0,
std::vector<MatchCmd *>{
new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}),
new MatchHwCmd<FamilyType, L3_CONTROL_WITHOUT_POST_SYNC>(AtLeastOne)},
&err);
EXPECT_TRUE(cmdBuffOk) << err;
this->mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr);
}
};
template <typename FamilyType>
class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentAndPostSyncRequiredThenExpectProperCacheFlushCommand : public HardwareCommandsTest {
public:
void TestBodyImpl() {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using L3_CONTROL_WITH_POST_SYNC = typename FamilyType::L3_CONTROL;
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
CommandQueueHw<FamilyType> cmdQ(nullptr, pClDevice, 0, false);
auto &commandStream = cmdQ.getCS(1024);
void *allocPtr = reinterpret_cast<void *>(static_cast<uintptr_t>(6 * MemoryConstants::pageSize));
MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2};
this->mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation);
constexpr uint64_t postSyncAddress = 1024;
size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(L3_CONTROL_WITH_POST_SYNC);
size_t actualSize = HardwareCommandsHelper<FamilyType>::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, postSyncAddress);
EXPECT_EQ(expectedSize, actualSize);
HardwareCommandsHelper<FamilyType>::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, postSyncAddress);
std::string err;
auto cmdBuffOk = expectCmdBuff<FamilyType>(cmdQ.getCS(0), 0,
std::vector<MatchCmd *>{
new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}),
new MatchHwCmd<FamilyType, L3_CONTROL_WITH_POST_SYNC>(1, Expects{EXPECT_MEMBER(L3_CONTROL_WITH_POST_SYNC, getPostSyncAddress, postSyncAddress),
EXPECT_MEMBER(L3_CONTROL_WITH_POST_SYNC, getPostSyncImmediateData, 0)})},
&err);
EXPECT_TRUE(cmdBuffOk) << err;
this->mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr);
}
};
using EnqueueKernelFixture = HelloWorldFixture<HelloWorldFixtureFactory>;
using EnqueueKernelTest = Test<EnqueueKernelFixture>;
template <typename FamilyType>
class GivenCacheFlushAfterWalkerEnabledAndProperSteppingIsSetWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest {
public:
void TestBodyImpl(bool isA0Stepping) {
using WALKER = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE;
using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL;
DebugManagerStateRestore restore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
DebugManager.flags.EnableTimestampPacket.set(0);
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto stepping = (isA0Stepping ? REVISION_A0 : REVISION_A1);
hardwareInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(stepping, hardwareInfo);
pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&hardwareInfo);
MockKernelWithInternals mockKernel(*pClDevice, context, true);
mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false;
auto memoryManager = pDevice->getUltCommandStreamReceiver<FamilyType>().getMemoryManager();
SVMAllocsManager svmManager(memoryManager, false);
void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
ASSERT_NE(nullptr, svm);
auto svmData = svmManager.getSVMAlloc(svm);
ASSERT_NE(nullptr, svmData);
auto svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
ASSERT_NE(nullptr, svmAllocation);
svmAllocation->setFlushL3Required(true);
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::INTERNAL_HEAP));
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false;
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
L3RangesVec rangesExpected;
coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION);
L3ControlPolicy<FamilyType> validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping};
std::string err;
auto cmdBuffOk = expectCmdBuff<FamilyType>(cmdQ->getCS(0), 0,
std::vector<MatchCmd *>{new MatchAnyCmd(AnyNumber),
new MatchHwCmd<FamilyType, WALKER>(1),
new MatchAnyCmd(AnyNumber),
new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}),
new MatchHwCmd<FamilyType, L3_CONTROL_WITHOUT_POST_SYNC>(AtLeastOne, Expects{&validateL3ControlPolicy}),
new MatchAnyCmd(AnyNumber)},
&err);
EXPECT_TRUE(cmdBuffOk) << err;
EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed);
memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation);
svmManager.freeSVMAlloc(svm);
}
};
template <typename FamilyType>
class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest {
public:
void TestBodyImpl() {
using WALKER = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE;
using L3_CONTROL_WITH_POST_SYNC = typename FamilyType::L3_CONTROL;
DebugManagerStateRestore restore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(1);
DebugManager.flags.EnableTimestampPacket.set(1);
MockKernelWithInternals mockKernel(*pDevice, context, true);
mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
auto memoryManager = pDevice->getUltCommandStreamReceiver<FamilyType>().getMemoryManager();
SVMAllocsManager svmManager(memoryManager, false);
void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
ASSERT_NE(nullptr, svm);
auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
svmAllocation->setFlushL3Required(true);
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::INTERNAL_HEAP));
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
L3RangesVec rangesExpected;
coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION);
L3ControlPolicy<FamilyType> validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION};
std::string err;
auto cmdBuffOk = expectCmdBuff<FamilyType>(cmdQ->getCS(0), 0,
std::vector<MatchCmd *>{new MatchAnyCmd(AnyNumber),
new MatchHwCmd<FamilyType, WALKER>(1),
new MatchAnyCmd(AnyNumber),
new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}),
new MatchAnyCmd(AnyNumber),
new MatchHwCmd<FamilyType, L3_CONTROL_WITH_POST_SYNC>(AtLeastOne, Expects{&validateL3ControlPolicy}),
new MatchAnyCmd(AnyNumber)},
&err);
EXPECT_TRUE(cmdBuffOk) << err;
auto expectedRangeWithPostSync = rangesExpected[rangesExpected.size() - 1];
auto l3ParsedRangeWithPostSync = validateL3ControlPolicy.l3RangesParsed[validateL3ControlPolicy.l3RangesParsed.size() - 1];
EXPECT_EQ(expectedRangeWithPostSync, l3ParsedRangeWithPostSync);
memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation);
svmManager.freeSVMAlloc(svm);
}
};
template <typename FamilyType>
class GivenCacheFlushAfterWalkerDisabledAndProperSteppingIsSetWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker : public EnqueueKernelTest {
public:
void TestBodyImpl(bool isA0Stepping) {
using WALKER = typename FamilyType::WALKER_TYPE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE;
using L3_CONTROL_BASE = typename FamilyType::L3_CONTROL_BASE;
DebugManagerStateRestore restore;
DebugManager.flags.EnableCacheFlushAfterWalker.set(0);
MockKernelWithInternals mockKernel(*pClDevice, context, true);
mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
auto memoryManager = pDevice->getUltCommandStreamReceiver<FamilyType>().getMemoryManager();
SVMAllocsManager svmManager(memoryManager, false);
void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
ASSERT_NE(nullptr, svm);
auto svmData = svmManager.getSVMAlloc(svm);
ASSERT_NE(nullptr, svmData);
auto svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex());
ASSERT_NE(nullptr, svmAllocation);
svmAllocation->setFlushL3Required(true);
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::INTERNAL_HEAP));
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
L3ControlPolicy<FamilyType> validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping};
std::string err;
auto cmdBuffOk = expectCmdBuff<FamilyType>(cmdQ->getCS(0), 0,
std::vector<MatchCmd *>{
new MatchAnyCmd(AnyNumber),
new MatchHwCmd<FamilyType, WALKER>(1),
new MatchAnyCmd(AnyNumber),
new MatchHwCmd<FamilyType, L3_CONTROL_BASE>(0),
new MatchAnyCmd(AnyNumber),
},
&err);
EXPECT_TRUE(cmdBuffOk) << err;
memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation);
svmManager.freeSVMAlloc(svm);
}
};
template <typename FamilyType>
class GivenCacheResourceSurfacesWhenprocessingCacheFlushThenExpectProperCacheFlushCommand : public EnqueueKernelTest {
public:
void TestBodyImpl() {
using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL;
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, 0);
auto &commandStream = cmdQ.getCS(1024);
cl_resource_barrier_descriptor_intel descriptor{};
cl_resource_barrier_descriptor_intel descriptor2{};
SVMAllocsManager *svmManager = cmdQ.getContext().getSVMAllocsManager();
void *svm = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields());
auto retVal = CL_INVALID_VALUE;
size_t bufferSize = MemoryConstants::pageSize;
std::unique_ptr<Buffer> buffer(Buffer::create(
context,
CL_MEM_READ_WRITE,
bufferSize,
nullptr,
retVal));
descriptor.svm_allocation_pointer = svm;
descriptor2.mem_object = buffer.get();
const cl_resource_barrier_descriptor_intel descriptors[] = {descriptor, descriptor2};
BarrierCommand bCmd(&cmdQ, descriptors, 2);
CsrDependencies csrDeps;
cmdQ.processDispatchForCacheFlush(bCmd.surfacePtrs.begin(), bCmd.numSurfaces, &commandStream, csrDeps);
std::string err;
auto cmdBuffOk = expectCmdBuff<FamilyType>(cmdQ.getCS(0), 0,
std::vector<MatchCmd *>{
new MatchHwCmd<FamilyType, L3_CONTROL_WITHOUT_POST_SYNC>(AtLeastOne)},
&err);
EXPECT_TRUE(cmdBuffOk) << err;
svmManager->freeSVMAlloc(svm);
}
};

View File

@@ -21,6 +21,7 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/blit_commands_helper_extra.cpp
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/cache_flush.inl
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cache_policy.h
${CMAKE_CURRENT_SOURCE_DIR}/common_types.h
@@ -66,6 +67,7 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/kernel_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/l3_range.h
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.cpp
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.h
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.inl

View File

@@ -0,0 +1,62 @@
/*
* Copyright (C) 2016-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/l3_range.h"
#include "shared/source/utilities/range.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/kernel/kernel.h"
#include "hw_cmds.h"
namespace NEO {
template <typename GfxFamily>
inline void flushGpuCache(LinearStream *commandStream, const Range<L3Range> &ranges, uint64_t postSyncAddress, const HardwareInfo &hwInfo) {
using L3_FLUSH_ADDRESS_RANGE = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE;
using L3_FLUSH_EVICTION_POLICY = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY;
auto templ = GfxFamily::cmdInitL3ControlWithPostSync;
templ.getBase().setHdcPipelineFlush(true);
HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto isA0Stepping = hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo);
for (const L3Range *it = &*ranges.begin(), *last = &*ranges.rbegin(), *end = &*ranges.end(); it != end; ++it) {
if ((it == last) && (postSyncAddress != 0)) {
auto l3Control = commandStream->getSpaceForCmd<typename GfxFamily::L3_CONTROL>();
auto cmd = GfxFamily::cmdInitL3ControlWithPostSync;
cmd.getBase().setHdcPipelineFlush(templ.getBase().getHdcPipelineFlush());
cmd.getL3FlushAddressRange().setL3FlushEvictionPolicy(L3_FLUSH_EVICTION_POLICY::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping);
cmd.getL3FlushAddressRange().setAddress(it->getMaskedAddress(), isA0Stepping);
cmd.getL3FlushAddressRange().setAddressMask(it->getMask(), isA0Stepping);
cmd.getBase().setPostSyncOperation(GfxFamily::L3_CONTROL_BASE::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA);
cmd.getPostSyncData().setAddress(postSyncAddress);
cmd.getPostSyncData().setImmediateData(0);
*l3Control = cmd;
} else {
auto l3Control = commandStream->getSpaceForCmd<typename GfxFamily::L3_CONTROL>();
templ.getL3FlushAddressRange().setAddress(it->getMaskedAddress(), isA0Stepping);
templ.getL3FlushAddressRange().setAddressMask(it->getMask(), isA0Stepping);
templ.getL3FlushAddressRange().setL3FlushEvictionPolicy(L3_FLUSH_EVICTION_POLICY::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping);
*l3Control = templ;
}
}
}
template <typename GfxFamily>
inline size_t getSizeNeededToFlushGpuCache(const Range<L3Range> &ranges, bool usePostSync) {
size_t size = ranges.size() * sizeof(typename GfxFamily::L3_CONTROL);
if (usePostSync) {
UNRECOVERABLE_IF(ranges.size() == 0);
}
return size;
}
} // namespace NEO

View File

@@ -0,0 +1,138 @@
/*
* Copyright (C) 2016-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/utilities/stackvec.h"
#include <cstdint>
#include <limits>
namespace NEO {
static const size_t maxFlushSubrangeCount = 126;
struct L3Range {
static constexpr uint64_t minAlignment = MemoryConstants::pageSize;
static constexpr uint64_t minAlignmentMask = minAlignment - 1ULL;
static constexpr uint64_t minAlignmentBitOffset = Math::ffs(minAlignment);
static constexpr uint64_t maxSingleRange = 4 * MemoryConstants::gigaByte;
static constexpr uint64_t maxMaskValue = Math::ffs(maxSingleRange / minAlignment);
static const uint64_t policySize = 2;
L3Range() = default;
uint64_t getMask() const {
return data.common.mask;
}
void setMask(uint64_t mask) {
data.common.mask = mask;
}
uint64_t getAddress() const {
return data.common.address << L3Range::minAlignmentBitOffset;
}
void setAddress(uint64_t address) {
data.common.address = address >> L3Range::minAlignmentBitOffset;
}
void setPolicy(uint64_t policy) {
data.common.policy = policy;
}
uint64_t getPolicy() const {
return data.common.policy;
}
static constexpr bool meetsMinimumAlignment(uint64_t v) {
return (0 == (v & minAlignmentMask));
}
static uint32_t getMaskFromSize(uint64_t size) {
UNRECOVERABLE_IF(false == Math::isPow2(size));
UNRECOVERABLE_IF((size < minAlignment) || (size > maxSingleRange));
auto ret = Math::ffs(size >> minAlignmentBitOffset);
static_assert(maxMaskValue < std::numeric_limits<uint32_t>::max(), "");
return static_cast<uint32_t>(ret);
}
uint64_t getSizeInBytes() const {
return (1ULL << (minAlignmentBitOffset + getMask()));
}
uint64_t getMaskedAddress() const {
return getAddress() & (~maxNBitValue(minAlignmentBitOffset + getMask()));
}
static L3Range fromAddressSize(uint64_t address, uint64_t size) {
L3Range ret;
ret.setAddress(address);
ret.setMask(getMaskFromSize(size));
return ret;
}
static L3Range fromAddressSizeWithPolicy(uint64_t address, uint64_t size, uint64_t policy) {
L3Range ret = fromAddressSize(address, size);
ret.setPolicy(policy);
return ret;
}
static L3Range fromAddressMask(uint64_t address, uint64_t mask) {
L3Range ret;
ret.setAddress(address);
ret.setMask(mask);
return ret;
}
protected:
union Data {
struct {
uint64_t mask : minAlignmentBitOffset;
uint64_t address : sizeof(uint64_t) * 8 - minAlignmentBitOffset - policySize;
uint64_t policy : policySize;
} common;
uint64_t raw;
} data;
static_assert(sizeof(Data) == sizeof(uint64_t), "");
};
inline bool operator==(const L3Range &lhs, const L3Range &rhs) {
return (lhs.getAddress() == rhs.getAddress()) && (lhs.getMask() == rhs.getMask());
}
inline bool operator!=(const L3Range &lhs, const L3Range &rhs) {
return (false == (lhs == rhs));
}
template <typename ContainerT>
inline void coverRangeExactImpl(uint64_t address, uint64_t size, ContainerT &ret, uint64_t policy) {
UNRECOVERABLE_IF(false == L3Range::meetsMinimumAlignment(address));
UNRECOVERABLE_IF(false == L3Range::meetsMinimumAlignment(size));
const uint64_t end = address + size;
uint64_t offset = address;
while (offset < end) {
uint64_t maxRangeSizeBySize = Math::prevPowerOfTwo(end - offset);
uint64_t maxRangeSizeByOffset = offset ? (1ULL << Math::ffs(offset)) : L3Range::maxSingleRange;
uint64_t rangeSize = std::min(maxRangeSizeBySize, maxRangeSizeByOffset);
rangeSize = std::min(rangeSize, +L3Range::maxSingleRange);
ret.push_back(L3Range::fromAddressSizeWithPolicy(offset, rangeSize, policy));
offset += rangeSize;
}
}
using L3RangesVec = StackVec<L3Range, 32>;
template <typename RetVecT>
inline void coverRangeExact(uint64_t address, uint64_t size, RetVecT &ret, uint64_t policy) {
coverRangeExactImpl(address, size, ret, policy);
}
} // namespace NEO

View File

@@ -10,6 +10,7 @@ set(NEO_SHARED_TESTS_CMD_PARSE
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_base_mi_arb.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_compute_mode.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_gpgpu_walker.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_l3_control.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_mi_arb.inl
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_sip.inl
${CMAKE_CURRENT_SOURCE_DIR}/gen_cmd_parse.h

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2016-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
using namespace NEO;
using L3_CONTROL_BASE = GenStruct::L3_CONTROL_BASE;
using L3_CONTROL = GenStruct::L3_CONTROL;
template <>
L3_CONTROL_BASE *genCmdCast<L3_CONTROL_BASE *>(void *buffer) {
auto pCmd = reinterpret_cast<L3_CONTROL_BASE *>(buffer);
return L3_CONTROL_BASE::TYPE_GFXPIPE == pCmd->TheStructure.Common.Type &&
L3_CONTROL_BASE::COMMAND_SUBTYPE_GFXPIPE_3D == pCmd->TheStructure.Common.CommandSubtype &&
L3_CONTROL_BASE::_3D_COMMAND_OPCODE_L3_CONTROL == pCmd->TheStructure.Common._3DCommandOpcode &&
L3_CONTROL_BASE::_3D_COMMAND_SUB_OPCODE_L3_CONTROL == pCmd->TheStructure.Common._3DCommandSubOpcode
? pCmd
: nullptr;
}
template <>
L3_CONTROL *genCmdCast<L3_CONTROL *>(void *buffer) {
auto pCmd = genCmdCast<L3_CONTROL_BASE *>(buffer);
if (pCmd == nullptr) {
return nullptr;
}
return reinterpret_cast<L3_CONTROL *>(pCmd);
}

View File

@@ -1,14 +1,38 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2016-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/cmd_parse/cmd_parse_l3_control.inl"
size_t getAdditionalCommandLengthHwSpecific(void *cmd) {
using L3_CONTROL_WITH_POST_SYNC = typename GenGfxFamily::L3_CONTROL;
using L3_CONTROL_WITHOUT_POST_SYNC = typename GenGfxFamily::L3_CONTROL;
auto pCmdWithPostSync = genCmdCast<L3_CONTROL_WITH_POST_SYNC *>(cmd);
if (pCmdWithPostSync)
return pCmdWithPostSync->getBase().TheStructure.Common.Length + 2;
auto pCmdWithoutPostSync = genCmdCast<L3_CONTROL_WITHOUT_POST_SYNC *>(cmd);
if (pCmdWithoutPostSync)
return pCmdWithoutPostSync->getBase().TheStructure.Common.Length + 2;
return 0;
}
const char *getAdditionalCommandNameHwSpecific(void *cmd) {
using L3_CONTROL_WITH_POST_SYNC = typename GenGfxFamily::L3_CONTROL;
using L3_CONTROL_WITHOUT_POST_SYNC = typename GenGfxFamily::L3_CONTROL;
if (nullptr != genCmdCast<L3_CONTROL_WITH_POST_SYNC *>(cmd)) {
return "L3_CONTROL(POST_SYNC)";
}
if (nullptr != genCmdCast<L3_CONTROL_WITHOUT_POST_SYNC *>(cmd)) {
return "L3_CONTROL(NO_POST_SYNC)";
}
return "UNKNOWN";
}

View File

@@ -1098,6 +1098,7 @@ using IsAtLeastGen12lp = IsAtLeastGfxCore<IGFX_GEN12LP_CORE>;
using IsBXT = IsProduct<IGFX_BROXTON>;
using IsCFL = IsProduct<IGFX_COFFEELAKE>;
using IsDG1 = IsProduct<IGFX_DG1>;
using IsEHL = IsProduct<IGFX_ELKHARTLAKE>;
using IsGLK = IsProduct<IGFX_GEMINILAKE>;
using IsICLLP = IsProduct<IGFX_ICELAKE_LP>;