Add OCL xe_hp_core unit tests

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2021-08-10 11:20:40 +00:00
committed by Compute-Runtime-Automation
parent b5d222f6cb
commit 14c93a6432
22 changed files with 5482 additions and 6 deletions

View File

@ -94,6 +94,9 @@ set(IGDRCL_SRCS_tests_command_queue
if(TESTS_XEHP_PLUS)
list(APPEND IGDRCL_SRCS_tests_command_queue
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests_xehp_plus.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_xehp_plus.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_resource_barier_tests_xehp_plus.cpp
${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_tests_xehp_plus.cpp
)
endif()

View File

@ -8,6 +8,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/timestamp_packet.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/memory_manager.h"
@ -30,6 +31,7 @@
#include "opencl/test/unit_test/fixtures/dispatch_flags_fixture.h"
#include "opencl/test/unit_test/fixtures/image_fixture.h"
#include "opencl/test/unit_test/fixtures/memory_management_fixture.h"
#include "opencl/test/unit_test/fixtures/multi_tile_fixture.h"
#include "opencl/test/unit_test/helpers/raii_hw_helper.h"
#include "opencl/test/unit_test/libult/ult_command_stream_receiver.h"
#include "opencl/test/unit_test/mocks/mock_allocation_properties.h"
@ -1684,3 +1686,229 @@ HWTEST_F(CommandQueueOnSpecificEngineTests, givenNotInitializedCcsOsContextWhenC
ASSERT_EQ(&osContext, queue.gpgpuEngine->osContext);
EXPECT_TRUE(osContext.isInitialized());
}
TEST_F(MultiTileFixture, givenSubDeviceWhenQueueIsCreatedThenItContainsProperDevice) {
auto tile0 = platform()->getClDevice(0)->getDeviceById(0);
const cl_device_id deviceId = tile0;
auto returnStatus = CL_SUCCESS;
auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, &returnStatus);
EXPECT_EQ(CL_SUCCESS, returnStatus);
EXPECT_NE(nullptr, context);
auto commandQueue = clCreateCommandQueueWithProperties(context, tile0, nullptr, &returnStatus);
EXPECT_EQ(CL_SUCCESS, returnStatus);
EXPECT_NE(nullptr, commandQueue);
auto neoQueue = castToObject<CommandQueue>(commandQueue);
EXPECT_EQ(&tile0->getDevice(), &neoQueue->getDevice());
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
}
TEST_F(MultiTileFixture, givenTile1WhenQueueIsCreatedThenItContainsTile1Device) {
auto tile1 = platform()->getClDevice(0)->getDeviceById(1);
const cl_device_id deviceId = tile1;
auto returnStatus = CL_SUCCESS;
auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, &returnStatus);
EXPECT_EQ(CL_SUCCESS, returnStatus);
EXPECT_NE(nullptr, context);
auto commandQueue = clCreateCommandQueueWithProperties(context, tile1, nullptr, &returnStatus);
EXPECT_EQ(CL_SUCCESS, returnStatus);
EXPECT_NE(nullptr, commandQueue);
auto neoQueue = castToObject<CommandQueue>(commandQueue);
EXPECT_EQ(&tile1->getDevice(), &neoQueue->getDevice());
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
}
struct CopyOnlyQueueTests : ::testing::Test {
void SetUp() override {
typeUsageRcs.first = EngineHelpers::remapEngineTypeToHwSpecific(typeUsageRcs.first, *defaultHwInfo);
auto device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get());
if (device->engineGroups[static_cast<uint32_t>(EngineGroupType::Copy)].empty()) {
GTEST_SKIP();
}
device->engineGroups.clear();
device->engineGroups.resize(static_cast<uint32_t>(EngineGroupType::MaxEngineGroups));
device->engines.clear();
device->createEngine(0, typeUsageRcs);
device->createEngine(1, typeUsageBcs);
bcsEngine = &device->getEngines().back();
clDevice = std::make_unique<MockClDevice>(device);
context = std::make_unique<MockContext>(clDevice.get());
properties[1] = device->getIndexOfNonEmptyEngineGroup(EngineGroupType::Copy);
}
EngineTypeUsage typeUsageBcs = EngineTypeUsage{aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular};
EngineTypeUsage typeUsageRcs = EngineTypeUsage{aub_stream::EngineType::ENGINE_RCS, EngineUsage::Regular};
std::unique_ptr<MockClDevice> clDevice{};
std::unique_ptr<MockContext> context{};
std::unique_ptr<MockCommandQueue> queue{};
const EngineControl *bcsEngine = nullptr;
cl_queue_properties properties[5] = {CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 0, 0};
};
TEST_F(CopyOnlyQueueTests, givenBcsSelectedWhenCreatingCommandQueueThenItIsCopyOnly) {
MockCommandQueue queue{context.get(), clDevice.get(), properties, false};
EXPECT_EQ(bcsEngine->commandStreamReceiver, queue.getBcsCommandStreamReceiver());
EXPECT_NE(nullptr, queue.timestampPacketContainer);
EXPECT_TRUE(queue.isCopyOnly);
}
HWTEST_F(CopyOnlyQueueTests, givenBcsSelectedWhenEnqueuingCopyThenBcsIsUsed) {
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(context.get())};
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(context.get())};
MockCommandQueueHw<FamilyType> queue{context.get(), clDevice.get(), properties};
auto commandStream = &bcsEngine->commandStreamReceiver->getCS(1024);
auto usedCommandStream = commandStream->getUsed();
cl_int retVal = queue.enqueueCopyBuffer(
srcBuffer.get(),
dstBuffer.get(),
0,
0,
1,
0,
nullptr,
nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(usedCommandStream, commandStream->getUsed());
}
HWTEST_F(CopyOnlyQueueTests, givenBlitterEnabledWhenCreatingBcsCommandQueueThenReturnSuccess) {
DebugManagerStateRestore restore{};
DebugManager.flags.EnableBlitterOperationsSupport.set(1);
cl_int retVal{};
auto commandQueue = clCreateCommandQueueWithProperties(context.get(), clDevice.get(), properties, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, commandQueue);
EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(commandQueue));
}
using MultiEngineQueueHwTests = ::testing::Test;
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiEngineQueueHwTests, givenQueueFamilyPropertyWhenQueueIsCreatedThenSelectValidEngine) {
initPlatform();
HardwareInfo localHwInfo = *defaultHwInfo;
localHwInfo.featureTable.ftrCCSNode = true;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
MockContext context(device.get());
context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE;
bool ccsFound = false;
for (auto &engine : device->engines) {
if (engine.osContext->getEngineType() == aub_stream::EngineType::ENGINE_CCS) {
ccsFound = true;
break;
}
}
struct CommandQueueTestValues {
CommandQueueTestValues() = delete;
CommandQueueTestValues(cl_queue_properties engineFamily, cl_queue_properties engineIndex, aub_stream::EngineType expectedEngine)
: expectedEngine(expectedEngine) {
properties[1] = engineFamily;
properties[3] = engineIndex;
};
cl_command_queue clCommandQueue = nullptr;
CommandQueue *commandQueueObj = nullptr;
cl_queue_properties properties[5] = {CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 0, 0};
aub_stream::EngineType expectedEngine;
};
auto addTestValueIfAvailable = [&](std::vector<CommandQueueTestValues> &vec, EngineGroupType engineGroup, cl_queue_properties queueIndex, aub_stream::EngineType engineType, bool csEnabled) {
if (csEnabled) {
const auto familyIndex = device->getDevice().getIndexOfNonEmptyEngineGroup(engineGroup);
vec.push_back(CommandQueueTestValues(static_cast<cl_queue_properties>(familyIndex), queueIndex, engineType));
}
};
auto retVal = CL_SUCCESS;
const auto &ccsInstances = localHwInfo.gtSystemInfo.CCSInfo.Instances.Bits;
std::vector<CommandQueueTestValues> commandQueueTestValues;
addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::RenderCompute, 0, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_RCS, device->getHardwareInfo()), true);
addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::Compute, 0, aub_stream::ENGINE_CCS, ccsFound);
addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::Compute, 1, aub_stream::ENGINE_CCS1, ccsInstances.CCS1Enabled);
addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::Compute, 2, aub_stream::ENGINE_CCS2, ccsInstances.CCS2Enabled);
addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::Compute, 3, aub_stream::ENGINE_CCS3, ccsInstances.CCS3Enabled);
for (auto &commandQueueTestValue : commandQueueTestValues) {
if (commandQueueTestValue.properties[1] >= HwHelper::getGpgpuEnginesCount(device->getHardwareInfo())) {
continue;
}
commandQueueTestValue.clCommandQueue = clCreateCommandQueueWithProperties(&context, device.get(),
&commandQueueTestValue.properties[0], &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
commandQueueTestValue.commandQueueObj = castToObject<CommandQueue>(commandQueueTestValue.clCommandQueue);
auto &cmdQueueEngine = commandQueueTestValue.commandQueueObj->getGpgpuCommandStreamReceiver().getOsContext().getEngineType();
EXPECT_EQ(commandQueueTestValue.expectedEngine, cmdQueueEngine);
clReleaseCommandQueue(commandQueueTestValue.commandQueueObj);
}
}
TEST_F(MultiTileFixture, givenDefaultContextWithRootDeviceWhenQueueIsCreatedThenQueueIsMultiEngine) {
auto rootDevice = platform()->getClDevice(0);
MockContext context(rootDevice);
context.contextType = ContextType::CONTEXT_TYPE_DEFAULT;
auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
MockCommandQueue queue(&context, rootDevice, nullptr, false);
ASSERT_NE(nullptr, queue.gpgpuEngine);
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
}
TEST_F(MultiTileFixture, givenDefaultContextWithSubdeviceWhenQueueIsCreatedThenQueueIsNotMultiEngine) {
auto subdevice = platform()->getClDevice(0)->getDeviceById(0);
MockContext context(subdevice);
context.contextType = ContextType::CONTEXT_TYPE_DEFAULT;
MockCommandQueue queue(&context, subdevice, nullptr, false);
ASSERT_NE(nullptr, queue.gpgpuEngine);
EXPECT_FALSE(queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
}
TEST_F(MultiTileFixture, givenUnrestrictiveContextWithRootDeviceWhenQueueIsCreatedThenQueueIsMultiEngine) {
auto rootDevice = platform()->getClDevice(0);
MockContext context(rootDevice);
context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE;
auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
MockCommandQueue queue(&context, rootDevice, nullptr, false);
ASSERT_NE(nullptr, queue.gpgpuEngine);
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
}
TEST_F(MultiTileFixture, givenNotDefaultContextWithRootDeviceAndTileIdMaskWhenQueueIsCreatedThenQueueIsMultiEngine) {
auto rootClDevice = platform()->getClDevice(0);
auto rootDevice = static_cast<RootDevice *>(&rootClDevice->getDevice());
MockContext context(rootClDevice);
context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE;
auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver;
MockCommandQueue queue(&context, rootClDevice, nullptr, false);
ASSERT_NE(nullptr, queue.gpgpuEngine);
EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable());
EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,398 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/pipeline_select_helper.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "opencl/test/unit_test/fixtures/media_kernel_fixture.h"
#include "opencl/test/unit_test/libult/ult_command_stream_receiver.h"
#include "test.h"
using namespace NEO;
typedef MediaKernelFixture<HelloWorldFixtureFactory> MediaKernelTest;
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueBlockedVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
cl_uint workDim = 1;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {1, 1, 1};
UserEvent userEvent(context);
cl_event blockedEvent = &userEvent;
auto retVal = pCmdQ->enqueueKernel(
pVmeKernel,
workDim,
globalWorkOffset,
globalWorkSize,
nullptr,
1,
&blockedEvent,
nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
userEvent.setStatus(CL_COMPLETE);
parseCommands<FamilyType>(*pCmdQ);
ASSERT_NE(cmdPipelineSelect, nullptr);
auto *pCmd = genCmdCast<PIPELINE_SELECT *>(cmdPipelineSelect);
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable());
EXPECT_FALSE(pCmd->getSystolicModeEnable());
pCmdQ->releaseVirtualEvent();
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueBlockedNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
cl_uint workDim = 1;
size_t globalWorkOffset[3] = {0, 0, 0};
size_t globalWorkSize[3] = {1, 1, 1};
UserEvent userEvent(context);
cl_event blockedEvent = &userEvent;
auto retVal = pCmdQ->enqueueKernel(
pKernel,
workDim,
globalWorkOffset,
globalWorkSize,
nullptr,
1,
&blockedEvent,
nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
userEvent.setStatus(CL_COMPLETE);
parseCommands<FamilyType>(*pCmdQ);
ASSERT_NE(cmdPipelineSelect, nullptr);
auto *pCmd = genCmdCast<PIPELINE_SELECT *>(cmdPipelineSelect);
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable());
EXPECT_FALSE(pCmd->getSystolicModeEnable());
pCmdQ->releaseVirtualEvent();
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
auto numCommands = getCommandsList<PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
auto pCmd = getCommand<PIPELINE_SELECT>();
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable());
EXPECT_FALSE(pCmd->getSystolicModeEnable());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
auto numCommands = getCommandsList<PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
auto pCmd = getCommand<PIPELINE_SELECT>();
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable());
EXPECT_FALSE(pCmd->getSystolicModeEnable());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueVmeKernelTwiceThenProgramPipelineSelectOnce) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
auto numCommands = getCommandsList<PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueNonVmeKernelTwiceThenProgramPipelineSelectOnce) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
auto numCommands = getCommandsList<PIPELINE_SELECT>().size();
EXPECT_EQ(1u, numCommands);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueVmeKernelAfterNonVmeKernelThenProgramPipelineSelectionAndMediaSamplerTwice) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
auto commands = getCommandsList<PIPELINE_SELECT>();
EXPECT_EQ(2u, commands.size());
auto pCmd = static_cast<PIPELINE_SELECT *>(commands.back());
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable());
EXPECT_FALSE(pCmd->getSystolicModeEnable());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueNonVmeKernelAfterVmeKernelThenProgramProgramPipelineSelectionAndMediaSamplerTwice) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
auto commands = getCommandsList<PIPELINE_SELECT>();
EXPECT_EQ(2u, commands.size());
auto pCmd = static_cast<PIPELINE_SELECT *>(commands.back());
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable());
EXPECT_FALSE(pCmd->getSystolicModeEnable());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToFalse) {
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&pDevice->getGpgpuCommandStreamReceiver());
csr->lastVmeSubslicesConfig = true;
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
EXPECT_TRUE(csr->lastVmeSubslicesConfig);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusCsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToTrue) {
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&pDevice->getGpgpuCommandStreamReceiver());
csr->lastVmeSubslicesConfig = false;
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
pVmeKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
parseCommands<FamilyType>(*pCmdQ);
itorWalker1 = find<typename FamilyType::COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker1);
EXPECT_FALSE(csr->lastVmeSubslicesConfig);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, WhenGettingCmdSizeForVmeThenZeroIsReturned) {
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&pDevice->getGpgpuCommandStreamReceiver());
csr->lastVmeSubslicesConfig = false;
EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false));
EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true));
csr->lastVmeSubslicesConfig = true;
EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false));
EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusWhenEnqueueSystolicKernelThenPipelineSelectEnablesSystolicMode) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
MockKernelWithInternals mockKernel(*pClDevice, context);
mockKernel.mockKernel->setSpecialPipelineSelectMode(true);
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
mockKernel.mockKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(mockKernel.mockKernel->requiresSpecialPipelineSelectMode());
parseCommands<FamilyType>(*pCmdQ);
auto numCommands = getCommandCount<PIPELINE_SELECT>();
EXPECT_EQ(1u, numCommands);
auto pCmd = getCommand<PIPELINE_SELECT>();
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_TRUE(pCmd->getSystolicModeEnable());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusWhenEnqueueNonSystolicKernelThenPipelineSelectDisablesSystolicMode) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
MockKernelWithInternals mockKernel(*pClDevice, context);
mockKernel.mockKernel->setSpecialPipelineSelectMode(false);
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
mockKernel.mockKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(mockKernel.mockKernel->requiresSpecialPipelineSelectMode());
parseCommands<FamilyType>(*pCmdQ);
auto numCommands = getCommandCount<PIPELINE_SELECT>();
EXPECT_EQ(1u, numCommands);
auto pCmd = getCommand<PIPELINE_SELECT>();
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_FALSE(pCmd->getSystolicModeEnable());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusWhenEnqueueTwoSystolicKernelsThenPipelineSelectEnablesSystolicModeOnce) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
MockKernelWithInternals mockKernel(*pClDevice, context);
mockKernel.mockKernel->setSpecialPipelineSelectMode(true);
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
mockKernel.mockKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(mockKernel.mockKernel->requiresSpecialPipelineSelectMode());
MockKernelWithInternals mockKernel2(*pClDevice, context);
mockKernel2.mockKernel->setSpecialPipelineSelectMode(true);
retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
mockKernel2.mockKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(mockKernel2.mockKernel->requiresSpecialPipelineSelectMode());
parseCommands<FamilyType>(*pCmdQ);
auto numCommands = getCommandCount<PIPELINE_SELECT>();
EXPECT_EQ(1u, numCommands);
auto pCmd = getCommand<PIPELINE_SELECT>();
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_TRUE(pCmd->getSystolicModeEnable());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, MediaKernelTest, givenXeHPPlusWhenEnqueueTwoKernelsThenPipelineSelectEnablesSystolicModeWhenNeeded) {
typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT;
MockKernelWithInternals mockKernel(*pClDevice, context);
mockKernel.mockKernel->setSpecialPipelineSelectMode(false);
auto retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
mockKernel.mockKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(mockKernel.mockKernel->requiresSpecialPipelineSelectMode());
MockKernelWithInternals mockKernel2(*pClDevice, context);
mockKernel2.mockKernel->setSpecialPipelineSelectMode(true);
retVal = EnqueueKernelHelper<>::enqueueKernel(
pCmdQ,
mockKernel2.mockKernel);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(mockKernel2.mockKernel->requiresSpecialPipelineSelectMode());
parseCommands<FamilyType>(*pCmdQ);
auto numCommands = getCommandCount<PIPELINE_SELECT>();
EXPECT_EQ(2u, numCommands);
auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectSystolicModeEnableMaskBits;
auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU;
auto itorCmd = find<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorCmd);
auto pCmd = genCmdCast<PIPELINE_SELECT *>(*itorCmd);
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_FALSE(pCmd->getSystolicModeEnable());
itorCmd = find<PIPELINE_SELECT *>(++itorCmd, cmdList.end());
ASSERT_NE(cmdList.end(), itorCmd);
pCmd = genCmdCast<PIPELINE_SELECT *>(*itorCmd);
EXPECT_EQ(expectedMask, pCmd->getMaskBits());
EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection());
EXPECT_TRUE(pCmd->getSystolicModeEnable());
}

View File

@ -0,0 +1,121 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/os_interface/os_context.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "opencl/source/command_queue/resource_barrier.h"
#include "opencl/source/event/event_builder.h"
#include "opencl/source/event/user_event.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/memory_manager/resource_surface.h"
#include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_timestamp_container.h"
#include "test.h"
namespace NEO {
template <typename GfxFamily>
class MockCommandQueueWithCacheFlush : public MockCommandQueueHw<GfxFamily> {
using MockCommandQueueHw<GfxFamily>::MockCommandQueueHw;
public:
bool isCacheFlushCommand(uint32_t commandType) const override {
return commandRequireCacheFlush;
}
bool commandRequireCacheFlush = false;
};
using EnqueueResourceBarrierTestXeHpCorePlus = EnqueueHandlerTest;
HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueResourceBarrierTestXeHpCorePlus, GivenCommandStreamWithoutKernelAndTimestampPacketEnabledWhenEnqueuedResourceBarrierWithEventThenTimestampAddedToEvent) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableTimestampPacket.set(1);
pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
std::unique_ptr<MockCommandQueueWithCacheFlush<FamilyType>> mockCmdQ(new MockCommandQueueWithCacheFlush<FamilyType>(context, pClDevice, 0));
mockCmdQ->commandRequireCacheFlush = true;
auto retVal = CL_INVALID_VALUE;
size_t bufferSize = MemoryConstants::pageSize;
std::unique_ptr<Buffer> buffer(Buffer::create(
context,
CL_MEM_READ_WRITE,
bufferSize,
nullptr,
retVal));
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
std::unique_ptr<ResourceSurface> surface(new ResourceSurface(allocation, CL_RESOURCE_BARRIER_TYPE_RELEASE, CL_MEMORY_SCOPE_DEVICE));
MockTimestampPacketContainer timestamp1(*pDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1);
Event event1(mockCmdQ.get(), 0, 0, 0);
cl_event event2;
event1.addTimestampPacketNodes(timestamp1);
cl_event waitlist[] = {&event1};
cl_resource_barrier_descriptor_intel descriptor{};
descriptor.mem_object = buffer.get();
descriptor.svm_allocation_pointer = nullptr;
BarrierCommand barrierCommand(mockCmdQ.get(), &descriptor, 1);
retVal = mockCmdQ->enqueueResourceBarrier(
&barrierCommand,
1,
waitlist,
&event2);
auto eventObj = castToObjectOrAbort<Event>(event2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(eventObj->getTimestampPacketNodes()->peekNodes().size(), 1u);
eventObj->release();
}
HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueResourceBarrierTestXeHpCorePlus, GivenCommandStreamWithoutKernelAndTimestampPacketDisabledWhenEnqueuedResourceBarrierWithEventThenTimestampNotAddedToEvent) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableTimestampPacket.set(0);
static_cast<UltCommandStreamReceiver<FamilyType> *>(&pDevice->getGpgpuCommandStreamReceiver())->timestampPacketWriteEnabled = false;
std::unique_ptr<MockCommandQueueWithCacheFlush<FamilyType>> mockCmdQ(new MockCommandQueueWithCacheFlush<FamilyType>(context, pClDevice, 0));
mockCmdQ->commandRequireCacheFlush = true;
mockCmdQ->timestampPacketContainer.reset();
auto retVal = CL_INVALID_VALUE;
size_t bufferSize = MemoryConstants::pageSize;
std::unique_ptr<Buffer> buffer(Buffer::create(
context,
CL_MEM_READ_WRITE,
bufferSize,
nullptr,
retVal));
auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
std::unique_ptr<ResourceSurface> surface(new ResourceSurface(allocation, CL_RESOURCE_BARRIER_TYPE_RELEASE, CL_MEMORY_SCOPE_DEVICE));
Event event1(mockCmdQ.get(), 0, 0, 0);
cl_event event2;
cl_event waitlist[] = {&event1};
cl_resource_barrier_descriptor_intel descriptor{};
descriptor.mem_object = buffer.get();
descriptor.svm_allocation_pointer = nullptr;
BarrierCommand barrierCommand(mockCmdQ.get(), &descriptor, 1);
retVal = mockCmdQ->enqueueResourceBarrier(
&barrierCommand,
1,
waitlist,
&event2);
auto eventObj = castToObjectOrAbort<Event>(event2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(nullptr, eventObj->getTimestampPacketNodes());
eventObj->release();
}
} // namespace NEO

View File

@ -5,12 +5,14 @@
*
*/
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/fixtures/context_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "gtest/gtest.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "test.h"
using namespace NEO;
@ -122,7 +124,9 @@ INSTANTIATE_TEST_CASE_P(
GetCommandQueueInfoTest,
::testing::ValuesIn(DefaultCommandQueueProperties));
TEST(GetCommandQueueFamilyInfoTest, givenQueueFamilyNotSelectedWhenGettingFamilyAndQueueIndexThenValuesAreReturned) {
using GetCommandQueueFamilyInfoTests = ::testing::Test;
TEST_F(GetCommandQueueFamilyInfoTests, givenQueueFamilyNotSelectedWhenGettingFamilyAndQueueIndexThenValuesAreReturned) {
MockContext context{};
MockCommandQueue queue{context};
queue.queueFamilySelected = false;
@ -153,7 +157,7 @@ TEST(GetCommandQueueFamilyInfoTest, givenQueueFamilyNotSelectedWhenGettingFamily
EXPECT_EQ(0u, queueIndex);
}
TEST(GetCommandQueueFamilyInfoTest, givenQueueFamilySelectedWhenGettingFamilyAndQueueIndexThenValuesAreReturned) {
TEST_F(GetCommandQueueFamilyInfoTests, givenQueueFamilySelectedWhenGettingFamilyAndQueueIndexThenValuesAreReturned) {
MockCommandQueue queue;
queue.queueFamilySelected = true;
queue.queueFamilyIndex = 12u;
@ -178,3 +182,87 @@ TEST(GetCommandQueueFamilyInfoTest, givenQueueFamilySelectedWhenGettingFamilyAnd
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(queue.queueIndexWithinFamily, queueIndex);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, GetCommandQueueFamilyInfoTests, givenFamilyIdWhenGettingCommandQueueInfoThenCorrectValueIsReturned) {
HardwareInfo hwInfo = *defaultHwInfo.get();
hwInfo.featureTable.ftrCCSNode = true;
MockClDevice mockClDevice{MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0)};
const cl_device_id deviceId = &mockClDevice;
auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, nullptr);
auto ccsFamily = mockClDevice.getDevice().getIndexOfNonEmptyEngineGroup(EngineGroupType::Compute);
cl_command_queue_properties properties[] = {CL_QUEUE_FAMILY_INTEL, ccsFamily, CL_QUEUE_INDEX_INTEL, 0, 0};
EXPECT_EQ(1u, mockClDevice.getNumAvailableDevices());
auto commandQueue = clCreateCommandQueueWithProperties(context, deviceId, properties, nullptr);
auto neoQueue = castToObject<CommandQueue>(commandQueue);
cl_uint familyParameter;
auto retVal = neoQueue->getCommandQueueInfo(
CL_QUEUE_FAMILY_INTEL,
sizeof(familyParameter),
&familyParameter,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(ccsFamily, familyParameter);
cl_uint indexParameter;
retVal = neoQueue->getCommandQueueInfo(
CL_QUEUE_INDEX_INTEL,
sizeof(indexParameter),
&indexParameter,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, indexParameter);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, GetCommandQueueFamilyInfoTests, givenFamilyIdWhenCreatingCommandQueueForRootDeviceWithMultipleSubDevicesThenInvalidValueIsReturned) {
DebugManagerStateRestore restorer;
DebugManager.flags.CreateMultipleSubDevices.set(2);
initPlatform();
auto rootDevice = platform()->getClDevice(0);
const cl_device_id deviceId = rootDevice;
auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, nullptr);
cl_command_queue_properties properties[] = {CL_QUEUE_FAMILY_INTEL, static_cast<uint32_t>(EngineGroupType::Compute), CL_QUEUE_INDEX_INTEL, 0, 0};
EXPECT_EQ(2u, rootDevice->getNumAvailableDevices());
cl_int retVal;
auto commandQueue = clCreateCommandQueueWithProperties(context, rootDevice, properties, &retVal);
EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal);
EXPECT_EQ(nullptr, commandQueue);
clReleaseContext(context);
}
using MultiEngineQueueHwTests = ::testing::Test;
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiEngineQueueHwTests, givenLimitedNumberOfCcsWhenCreatingCmdQueueThenFailOnNotSupportedCcs) {
HardwareInfo localHwInfo = *defaultHwInfo;
localHwInfo.gtSystemInfo.CCSInfo.IsValid = true;
localHwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4;
localHwInfo.gtSystemInfo.CCSInfo.Instances.CCSEnableMask = 0b1111;
localHwInfo.featureTable.ftrCCSNode = true;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
MockContext context(device.get());
context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE;
const uint32_t ccsCount = 4;
auto ccsEngine = device->getDevice().getIndexOfNonEmptyEngineGroup(EngineGroupType::Compute);
cl_queue_properties properties[5] = {CL_QUEUE_FAMILY_INTEL, ccsEngine, CL_QUEUE_INDEX_INTEL, 0, 0};
auto mutableHwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo();
for (uint32_t i = 0; i < ccsCount; i++) {
properties[3] = i;
mutableHwInfo->gtSystemInfo.CCSInfo.Instances.CCSEnableMask = (1 << i);
cl_int retVal = CL_SUCCESS;
cl_command_queue clCommandQueue = clCreateCommandQueueWithProperties(&context, device.get(), properties, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
clReleaseCommandQueue(clCommandQueue);
}
}

View File

@ -29,6 +29,7 @@ set(IGDRCL_SRCS_tests_command_stream
${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_receiver_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/get_devices_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling_ocl_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/linear_stream_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/linear_stream_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator_tests.cpp
@ -41,5 +42,16 @@ set(IGDRCL_SRCS_tests_command_stream
${CMAKE_CURRENT_SOURCE_DIR}/compute_mode_tests.h
)
if(TESTS_XEHP_PLUS)
list(APPEND IGDRCL_SRCS_tests_command_stream
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_tests_xehp_plus.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_tests_xehp_plus.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_xehp_plus.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_tests_xehp_plus.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compute_mode_tests_xehp_plus.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_tests_xehp_plus.cpp
)
endif()
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_command_stream})
add_subdirectories()

View File

@ -0,0 +1,380 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/aub_mem_dump/page_table_entry_bits.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/memory_manager/memory_banks.h"
#include "shared/source/memory_manager/memory_pool.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h"
#include "opencl/source/helpers/memory_properties_helpers.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/helpers/hw_helper_tests.h"
#include "opencl/test/unit_test/libult/ult_aub_command_stream_receiver.h"
#include "opencl/test/unit_test/mocks/mock_aub_csr.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_os_context.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "test.h"
#include <set>
#include <vector>
using namespace NEO;
struct XeHPPlusAubCommandStreamReceiverTests : ClDeviceFixture, ::testing::Test {
template <typename FamilyType>
void setUpImpl() {
hardwareInfo = *defaultHwInfo;
hardwareInfoSetup[hardwareInfo.platform.eProductFamily](&hardwareInfo, true, 0);
hardwareInfo.gtSystemInfo.MultiTileArchInfo.IsValid = true;
ClDeviceFixture::SetUpImpl(&hardwareInfo);
}
void SetUp() override {
}
void TearDown() override {
ClDeviceFixture::TearDown();
}
};
template <typename FamilyType>
class MockAubCsrXeHPPlus : public AUBCommandStreamReceiverHw<FamilyType> {
public:
using AUBCommandStreamReceiverHw<FamilyType>::getAddressSpace;
using CommandStreamReceiverHw<FamilyType>::localMemoryEnabled;
using CommandStreamReceiverSimulatedHw<FamilyType>::createPhysicalAddressAllocator;
MockAubCsrXeHPPlus(const std::string &fileName,
bool standalone, ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield)
: AUBCommandStreamReceiverHw<FamilyType>(fileName, standalone, executionEnvironment, rootDeviceIndex, deviceBitfield) {}
uint32_t getDeviceIndex() const override {
return deviceIndex;
}
uint32_t deviceIndex = 0u;
};
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenGetGUCWorkQueueItemHeaderIsCalledThenAppropriateValueDependingOnEngineTypeIsReturned) {
setUpImpl<FamilyType>();
MockOsContext rcsOsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::Disabled, false);
MockOsContext ccs0OsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_CCS, EngineUsage::Regular}, PreemptionMode::Disabled, false);
MockOsContext ccs1OsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_CCS1, EngineUsage::Regular}, PreemptionMode::Disabled, false);
MockOsContext ccs2OsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_CCS2, EngineUsage::Regular}, PreemptionMode::Disabled, false);
MockOsContext ccs3OsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_CCS3, EngineUsage::Regular}, PreemptionMode::Disabled, false);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
aubCsr->setupContext(ccs0OsContext);
EXPECT_EQ(0x00030401u, aubCsr->getGUCWorkQueueItemHeader());
aubCsr->setupContext(ccs1OsContext);
EXPECT_EQ(0x00030401u, aubCsr->getGUCWorkQueueItemHeader());
aubCsr->setupContext(ccs2OsContext);
EXPECT_EQ(0x00030401u, aubCsr->getGUCWorkQueueItemHeader());
aubCsr->setupContext(ccs3OsContext);
EXPECT_EQ(0x00030401u, aubCsr->getGUCWorkQueueItemHeader());
aubCsr->setupContext(rcsOsContext);
EXPECT_EQ(0x00030001u, aubCsr->getGUCWorkQueueItemHeader());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenGraphicsAlloctionWithNonLocalMemoryPoolWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
DebugManager.flags.AUBDumpForceAllToLocalMemory.set(false);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
MockGraphicsAllocation allocation(nullptr, 0);
auto bits = aubCsr->getPPGTTAdditionalBits(&allocation);
EXPECT_EQ(3u, bits);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenGraphicsAlloctionWithLocalMemoryPoolWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
DebugManager.flags.AUBDumpForceAllToLocalMemory.set(false);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
MockGraphicsAllocation allocation(nullptr, 0);
allocation.overrideMemoryPool(MemoryPool::LocalMemory);
auto bits = aubCsr->getPPGTTAdditionalBits(&allocation);
EXPECT_EQ(3u | (1 << 11), bits);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenAubDumpForceAllToLocalMemoryPoolWhenGetPPGTTAdditionalBitsIsCalledThenLocalBitIsReturned) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
DebugManager.flags.AUBDumpForceAllToLocalMemory.set(true);
std::unique_ptr<AUBCommandStreamReceiverHw<FamilyType>> aubCsr(new AUBCommandStreamReceiverHw<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
MockGraphicsAllocation allocation(nullptr, 0);
auto bits = aubCsr->getPPGTTAdditionalBits(&allocation);
EXPECT_EQ(3u | (1 << 11), bits);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenAubDumpForceAllToLocalMemoryEnabledWhenGetAddressSpaceIsCalledThenTraceLocalIsReturned) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
DebugManager.flags.AUBDumpForceAllToLocalMemory.set(true);
std::unique_ptr<MockAubCsrXeHPPlus<FamilyType>> aubCsr(new MockAubCsrXeHPPlus<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
auto stream = std::make_unique<MockAubFileStreamMockMmioWrite>();
aubCsr->stream = stream.get();
auto addressSpace = aubCsr->getAddressSpace(AubMemDump::DataTypeHintValues::TraceNotype);
EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceLocal, addressSpace);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenAubDumpForceAllToLocalMemoryDisabledWhenGetAddressSpaceIsCalledThenTraceNonlocalIsReturned) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
DebugManager.flags.AUBDumpForceAllToLocalMemory.set(false);
std::unique_ptr<MockAubCsrXeHPPlus<FamilyType>> aubCsr(new MockAubCsrXeHPPlus<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
auto stream = std::make_unique<MockAubFileStreamMockMmioWrite>();
aubCsr->stream = stream.get();
auto addressSpace = aubCsr->getAddressSpace(AubMemDump::DataTypeHintValues::TraceNotype);
EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceNonlocal, addressSpace);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenCCSEnabledWhenEngineMmiosAreInitializedThenExpectL3ConfigMmioIsWritten) {
setUpImpl<FamilyType>();
MockOsContext osContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_CCS, EngineUsage::Regular}, PreemptionMode::Disabled, false);
AUBCommandStreamReceiverHw<FamilyType> aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
aubCsr.setupContext(osContext);
auto stream = std::make_unique<MockAubFileStreamMockMmioWrite>();
aubCsr.stream = stream.get();
aubCsr.initEngineMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0xB234, 0xA0000000u)));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenRCSEnabledWhenEngineMmiosAreInitializedThenExpectL3ConfigMmioIsWritten) {
setUpImpl<FamilyType>();
MockOsContext osContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::Disabled, false);
AUBCommandStreamReceiverHw<FamilyType> aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
aubCsr.setupContext(osContext);
auto stream = std::make_unique<MockAubFileStreamMockMmioWrite>();
aubCsr.stream = stream.get();
aubCsr.initEngineMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0xB134, 0xA0000000u)));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenLocaLMemoryBitWhenGetAddressSpaceFromPTEBitsIsCalledThenTraceLocalIsReturned) {
setUpImpl<FamilyType>();
std::unique_ptr<MockAubCsrXeHPPlus<FamilyType>> aubCsr(new MockAubCsrXeHPPlus<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
auto stream = std::make_unique<MockAubFileStreamMockMmioWrite>();
aubCsr->stream = stream.get();
uint64_t bits = BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::localMemoryBit);
auto addressSpace = aubCsr->getAddressSpaceFromPTEBits(bits);
EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceLocal, addressSpace);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenLocalMemoryEnabledWhenGetMemoryBankForGttIsCalledThenCorrectBankForDeviceIsReturned) {
setUpImpl<FamilyType>();
std::unique_ptr<MockAubCsrXeHPPlus<FamilyType>> aubCsr(new MockAubCsrXeHPPlus<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
aubCsr->localMemoryEnabled = true;
auto bank = aubCsr->getMemoryBankForGtt();
EXPECT_EQ(MemoryBanks::getBankForLocalMemory(0), bank);
aubCsr->deviceIndex = 1u;
bank = aubCsr->getMemoryBankForGtt();
EXPECT_EQ(MemoryBanks::getBankForLocalMemory(1), bank);
aubCsr->deviceIndex = 2u;
bank = aubCsr->getMemoryBankForGtt();
EXPECT_EQ(MemoryBanks::getBankForLocalMemory(2), bank);
aubCsr->deviceIndex = 3u;
bank = aubCsr->getMemoryBankForGtt();
EXPECT_EQ(MemoryBanks::getBankForLocalMemory(3), bank);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, whenPhysicalAllocatorIsCreatedThenItHasCorrectBankSzieAndNumberOfBanks) {
setUpImpl<FamilyType>();
std::unique_ptr<MockAubCsrXeHPPlus<FamilyType>> aubCsr(new MockAubCsrXeHPPlus<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
auto physicalAddressAllocator = std::unique_ptr<PhysicalAddressAllocator>(aubCsr->createPhysicalAddressAllocator(&pDevice->getHardwareInfo()));
auto allocator = reinterpret_cast<PhysicalAddressAllocatorHw<FamilyType> *>(physicalAddressAllocator.get());
EXPECT_EQ(32 * MemoryConstants::gigaByte, allocator->getBankSize());
EXPECT_EQ(1u, allocator->getNumberOfBanks());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, whenPhysicalAllocatorIsCreatedWith4TileConfigThenItHasCorrectBankSzieAndNumberOfBanks) {
DebugManagerStateRestore restorer;
DebugManager.flags.CreateMultipleSubDevices.set(4);
setUpImpl<FamilyType>();
std::unique_ptr<MockAubCsrXeHPPlus<FamilyType>> aubCsr(new MockAubCsrXeHPPlus<FamilyType>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
auto physicalAddressAllocator = std::unique_ptr<PhysicalAddressAllocator>(aubCsr->createPhysicalAddressAllocator(&pDevice->getHardwareInfo()));
auto allocator = reinterpret_cast<PhysicalAddressAllocatorHw<FamilyType> *>(physicalAddressAllocator.get());
EXPECT_EQ(8 * MemoryConstants::gigaByte, allocator->getBankSize());
EXPECT_EQ(4u, allocator->getNumberOfBanks());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenInitEngineMMIOIsCalledForGivenEngineTypeThenCorrespondingMmiosAreInitialized) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
MockOsContext rcsOsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::Disabled, false);
MockOsContext ccs0OsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_CCS, EngineUsage::Regular}, PreemptionMode::Disabled, false);
MockOsContext ccs1OsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_CCS1, EngineUsage::Regular}, PreemptionMode::Disabled, false);
MockOsContext ccs2OsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_CCS2, EngineUsage::Regular}, PreemptionMode::Disabled, false);
MockOsContext ccs3OsContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_CCS3, EngineUsage::Regular}, PreemptionMode::Disabled, false);
auto aubCsr = std::make_unique<AUBCommandStreamReceiverHw<FamilyType>>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
EXPECT_NE(nullptr, aubCsr);
auto stream = std::make_unique<MockAubFileStreamMockMmioWrite>();
aubCsr->stream = stream.get();
aubCsr->setupContext(rcsOsContext);
aubCsr->initEngineMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0002000 + 0x000058, 0x00000000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0002000 + 0x00029c, 0xffff8280)));
aubCsr->setupContext(ccs0OsContext);
aubCsr->initEngineMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000ce90, 0x00030003)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x1a000 + 0x000029c, 0xffff8280)));
aubCsr->setupContext(ccs1OsContext);
aubCsr->initEngineMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000ce90, 0x00030003)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x1c000 + 0x000029c, 0xffff8280)));
aubCsr->setupContext(ccs2OsContext);
aubCsr->initEngineMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000ce90, 0x00030003)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x1e000 + 0x000029c, 0xffff8280)));
aubCsr->setupContext(ccs3OsContext);
aubCsr->initEngineMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000ce90, 0x00030003)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x26000 + 0x000029c, 0xffff8280)));
}
template <class FamilyType>
static void checkCcsEngineMMIO(aub_stream::EngineType engineType, uint32_t mmioBase) {
auto &mmioList = *AUBFamilyMapper<FamilyType>::perEngineMMIO[engineType];
EXPECT_EQ(mmioList[0], MMIOPair(0x0000ce90, 0x00030003));
EXPECT_EQ(mmioList[1], MMIOPair(0x0000b170, 0x00030003));
EXPECT_EQ(mmioList[2], MMIOPair(0x00014800, 0xFFFF0001));
EXPECT_EQ(mmioList[3], MMIOPair(mmioBase + 0x000029c, 0xffff8280));
EXPECT_EQ(mmioList[4], MMIOPair(mmioBase + 0x00004d0, 0x0000e000));
EXPECT_EQ(mmioList[5], MMIOPair(mmioBase + 0x00004d4, 0x0000e000));
EXPECT_EQ(mmioList[6], MMIOPair(mmioBase + 0x00004d8, 0x0000e000));
EXPECT_EQ(mmioList[7], MMIOPair(mmioBase + 0x00004dc, 0x0000e000));
EXPECT_EQ(mmioList[8], MMIOPair(mmioBase + 0x00004e0, 0x0000e000));
EXPECT_EQ(mmioList[9], MMIOPair(mmioBase + 0x00004e4, 0x0000e000));
EXPECT_EQ(mmioList[10], MMIOPair(mmioBase + 0x00004e8, 0x0000e000));
EXPECT_EQ(mmioList[11], MMIOPair(mmioBase + 0x00004ec, 0x0000e000));
EXPECT_EQ(mmioList[12], MMIOPair(mmioBase + 0x00004f0, 0x0000e000));
EXPECT_EQ(mmioList[13], MMIOPair(mmioBase + 0x00004f4, 0x0000e000));
EXPECT_EQ(mmioList[14], MMIOPair(mmioBase + 0x00004f8, 0x0000e000));
EXPECT_EQ(mmioList[15], MMIOPair(mmioBase + 0x00004fc, 0x0000e000));
EXPECT_EQ(mmioList[16], MMIOPair(0x0000B234, 0xA0000000));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenCcsEngineMmioListForSpecificCcsInstanceIsReadThenItIsInitializedWithProperValues) {
setUpImpl<FamilyType>();
checkCcsEngineMMIO<FamilyType>(aub_stream::ENGINE_CCS, 0x1a000);
checkCcsEngineMMIO<FamilyType>(aub_stream::ENGINE_CCS1, 0x1c000);
checkCcsEngineMMIO<FamilyType>(aub_stream::ENGINE_CCS2, 0x1e000);
checkCcsEngineMMIO<FamilyType>(aub_stream::ENGINE_CCS3, 0x26000);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenRcsEngineMmioListIsReadThenItIsInitializedWithProperValues) {
setUpImpl<FamilyType>();
auto &mmioList = *AUBFamilyMapper<FamilyType>::perEngineMMIO[aub_stream::ENGINE_RCS];
auto mmioBase = 0x002000;
EXPECT_EQ(mmioList[0], MMIOPair(mmioBase + 0x000058, 0x00000000));
EXPECT_EQ(mmioList[1], MMIOPair(mmioBase + 0x0000a8, 0x00000000));
EXPECT_EQ(mmioList[2], MMIOPair(mmioBase + 0x000029c, 0xffff8280));
EXPECT_EQ(mmioList[3], MMIOPair(0x00002090, 0xffff0000));
EXPECT_EQ(mmioList[4], MMIOPair(0x000020e0, 0xffff4000));
EXPECT_EQ(mmioList[5], MMIOPair(0x000020e4, 0xffff0000));
EXPECT_EQ(mmioList[6], MMIOPair(0x000020ec, 0xffff0051));
EXPECT_EQ(mmioList[7], MMIOPair(mmioBase + 0x00004d0, 0x00007014));
EXPECT_EQ(mmioList[8], MMIOPair(mmioBase + 0x00004d4, 0x0000e000));
EXPECT_EQ(mmioList[9], MMIOPair(mmioBase + 0x00004d8, 0x0000e000));
EXPECT_EQ(mmioList[10], MMIOPair(mmioBase + 0x00004dc, 0x0000e000));
EXPECT_EQ(mmioList[11], MMIOPair(mmioBase + 0x00004e0, 0x0000e000));
EXPECT_EQ(mmioList[12], MMIOPair(mmioBase + 0x00004e4, 0x0000e000));
EXPECT_EQ(mmioList[13], MMIOPair(mmioBase + 0x00004e8, 0x0000e000));
EXPECT_EQ(mmioList[14], MMIOPair(mmioBase + 0x00004ec, 0x0000e000));
EXPECT_EQ(mmioList[15], MMIOPair(mmioBase + 0x00004f0, 0x0000e000));
EXPECT_EQ(mmioList[16], MMIOPair(mmioBase + 0x00004f4, 0x0000e000));
EXPECT_EQ(mmioList[17], MMIOPair(mmioBase + 0x00004f8, 0x0000e000));
EXPECT_EQ(mmioList[18], MMIOPair(mmioBase + 0x00004fc, 0x0000e000));
EXPECT_EQ(mmioList[19], MMIOPair(0x00002580, 0xffff0005));
EXPECT_EQ(mmioList[20], MMIOPair(0x0000e194, 0xffff0002));
EXPECT_EQ(mmioList[21], MMIOPair(0x0000B134, 0xA0000000));
}
using XeHPPlusAubCommandStreamReceiverTests2 = HwHelperTest;
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusAubCommandStreamReceiverTests2, givenLocalMemoryEnabledInCSRWhenGetGTTDataIsCalledThenLocalMemoryIsSet) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableLocalMemory.set(1);
hardwareInfo.featureTable.ftrLocalMemory = true;
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo));
std::unique_ptr<MockAubCsrXeHPPlus<FamilyType>> aubCsr(std::make_unique<MockAubCsrXeHPPlus<FamilyType>>("", true, *device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()));
EXPECT_TRUE(aubCsr->localMemoryEnabled);
AubGTTData data = {false, false};
aubCsr->getGTTData(nullptr, data);
EXPECT_TRUE(data.localMemory);
}

View File

@ -0,0 +1,685 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h"
#include "opencl/test/unit_test/libult/ult_command_stream_receiver.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_csr.h"
#include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h"
#include "test.h"
using namespace NEO;
typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskXeHPPlusTests;
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, whenReprogrammingSshThenBindingTablePoolIsProgrammed) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.getCS(0));
auto bindingTablePoolAlloc = getCommand<typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC>();
ASSERT_NE(nullptr, bindingTablePoolAlloc);
EXPECT_EQ(reinterpret_cast<uintptr_t>(ssh.getCpuBase()), bindingTablePoolAlloc->getBindingTablePoolBaseAddress());
EXPECT_EQ(ssh.getHeapSizeInPages(), bindingTablePoolAlloc->getBindingTablePoolBufferSize());
EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER),
bindingTablePoolAlloc->getSurfaceObjectControlStateIndexToMocsTables());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, whenReprogrammingSshThenBindingTablePoolIsProgrammedWithCachingOffWhenDebugKeyPresent) {
DebugManagerStateRestore restorer;
DebugManager.flags.DisableCachingForHeaps.set(1);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.getCS(0));
auto bindingTablePoolAlloc = getCommand<typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC>();
ASSERT_NE(nullptr, bindingTablePoolAlloc);
EXPECT_EQ(reinterpret_cast<uintptr_t>(ssh.getCpuBase()), bindingTablePoolAlloc->getBindingTablePoolBaseAddress());
EXPECT_EQ(ssh.getHeapSizeInPages(), bindingTablePoolAlloc->getBindingTablePoolBufferSize());
EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED),
bindingTablePoolAlloc->getSurfaceObjectControlStateIndexToMocsTables());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, whenNotReprogrammingSshThenBindingTablePoolIsNotProgrammed) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.getCS(0));
auto stateBaseAddress = getCommand<typename FamilyType::STATE_BASE_ADDRESS>();
EXPECT_NE(nullptr, stateBaseAddress);
auto bindingTablePoolAlloc = getCommand<typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC>();
ASSERT_NE(nullptr, bindingTablePoolAlloc);
EXPECT_EQ(reinterpret_cast<uintptr_t>(ssh.getCpuBase()), bindingTablePoolAlloc->getBindingTablePoolBaseAddress());
EXPECT_EQ(ssh.getHeapSizeInPages(), bindingTablePoolAlloc->getBindingTablePoolBufferSize());
EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER),
bindingTablePoolAlloc->getSurfaceObjectControlStateIndexToMocsTables());
auto offset = commandStreamReceiver.getCS(0).getUsed();
// make SBA dirty (using ioh as dsh and dsh as ioh just to force SBA reprogramming)
commandStreamReceiver.flushTask(commandStream, 0, ioh, dsh, ssh, taskLevel, flushTaskFlags, *pDevice);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStreamReceiver.getCS(0), offset);
stateBaseAddress = hwParser.getCommand<typename FamilyType::STATE_BASE_ADDRESS>();
EXPECT_NE(nullptr, stateBaseAddress);
bindingTablePoolAlloc = hwParser.getCommand<typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC>();
EXPECT_EQ(nullptr, bindingTablePoolAlloc);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenStateBaseAddressWhenItIsRequiredThenThereIsPipeControlPriorToItWithTextureCacheFlushAndHdc) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>(false);
ioh.replaceBuffer(ptrOffset(ioh.getCpuBase(), +1u), ioh.getMaxAvailableSpace() + MemoryConstants::pageSize * 3);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.getCS(0));
auto stateBaseAddressItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
auto pipeControlItor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), stateBaseAddressItor);
EXPECT_NE(stateBaseAddressItor, pipeControlItor);
auto pipeControlCmd = reinterpret_cast<typename FamilyType::PIPE_CONTROL *>(*pipeControlItor);
EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), pipeControlCmd->getDcFlushEnable());
EXPECT_TRUE(pipeControlCmd->getHdcPipelineFlush());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, whenNotReprogrammingSshButInitProgrammingFlagsThenBindingTablePoolIsProgrammed) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.getCS(0));
auto stateBaseAddress = getCommand<typename FamilyType::STATE_BASE_ADDRESS>();
EXPECT_NE(nullptr, stateBaseAddress);
auto bindingTablePoolAlloc = getCommand<typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC>();
ASSERT_NE(nullptr, bindingTablePoolAlloc);
EXPECT_EQ(reinterpret_cast<uintptr_t>(ssh.getCpuBase()), bindingTablePoolAlloc->getBindingTablePoolBaseAddress());
EXPECT_EQ(ssh.getHeapSizeInPages(), bindingTablePoolAlloc->getBindingTablePoolBufferSize());
EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER),
bindingTablePoolAlloc->getSurfaceObjectControlStateIndexToMocsTables());
auto offset = commandStreamReceiver.getCS(0).getUsed();
commandStreamReceiver.initProgrammingFlags();
flushTask(commandStreamReceiver);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStreamReceiver.getCS(0), offset);
stateBaseAddress = hwParser.getCommand<typename FamilyType::STATE_BASE_ADDRESS>();
EXPECT_NE(nullptr, stateBaseAddress);
bindingTablePoolAlloc = hwParser.getCommand<typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC>();
EXPECT_NE(nullptr, bindingTablePoolAlloc);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenSbaProgrammingWhenHeapsAreNotProvidedThenDontProgram) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
uint64_t instructionHeapBase = 0x10000;
uint64_t internalHeapBase = 0x10000;
uint64_t generalStateBase = 0x30000;
STATE_BASE_ADDRESS sbaCmd;
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(&sbaCmd,
nullptr,
nullptr,
nullptr,
generalStateBase,
true,
0,
internalHeapBase,
instructionHeapBase,
0,
true,
false,
pDevice->getGmmHelper(),
false,
MemoryCompressionState::NotApplicable,
false,
1u);
EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
EXPECT_FALSE(sbaCmd.getDynamicStateBufferSizeModifyEnable());
EXPECT_EQ(0u, sbaCmd.getDynamicStateBaseAddress());
EXPECT_EQ(0u, sbaCmd.getDynamicStateBufferSize());
EXPECT_FALSE(sbaCmd.getIndirectObjectBaseAddressModifyEnable());
EXPECT_FALSE(sbaCmd.getIndirectObjectBufferSizeModifyEnable());
EXPECT_EQ(0u, sbaCmd.getIndirectObjectBaseAddress());
EXPECT_EQ(0u, sbaCmd.getIndirectObjectBufferSize());
EXPECT_FALSE(sbaCmd.getSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(0u, sbaCmd.getSurfaceStateBaseAddress());
EXPECT_TRUE(sbaCmd.getInstructionBaseAddressModifyEnable());
EXPECT_EQ(instructionHeapBase, sbaCmd.getInstructionBaseAddress());
EXPECT_TRUE(sbaCmd.getInstructionBufferSizeModifyEnable());
EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, sbaCmd.getInstructionBufferSize());
EXPECT_TRUE(sbaCmd.getGeneralStateBaseAddressModifyEnable());
EXPECT_TRUE(sbaCmd.getGeneralStateBufferSizeModifyEnable());
if constexpr (is64bit) {
EXPECT_EQ(GmmHelper::decanonize(internalHeapBase), sbaCmd.getGeneralStateBaseAddress());
} else {
EXPECT_EQ(generalStateBase, sbaCmd.getGeneralStateBaseAddress());
}
EXPECT_EQ(0xfffffu, sbaCmd.getGeneralStateBufferSize());
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateBaseAddress());
EXPECT_FALSE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateSize());
}
using isXeHPOrAbove = IsAtLeastProduct<IGFX_XE_HP_SDV>;
HWTEST2_F(CommandStreamReceiverFlushTaskXeHPPlusTests, whenFlushAllCachesVariableIsSetAndAddPipeControlIsCalledThenFieldsAreProperlySet, isXeHPOrAbove) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.FlushAllCaches.set(true);
char buff[sizeof(PIPE_CONTROL) * 3];
LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3);
PipeControlArgs args;
MemorySynchronizationCommands<FamilyType>::addPipeControl(stream, args);
parseCommands<FamilyType>(stream, 0);
PIPE_CONTROL *pipeControl = getCommand<PIPE_CONTROL>();
ASSERT_NE(nullptr, pipeControl);
// WA pipeControl added
if (cmdList.size() == 2) {
pipeControl++;
}
EXPECT_TRUE(pipeControl->getDcFlushEnable());
EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable());
EXPECT_TRUE(pipeControl->getInstructionCacheInvalidateEnable());
EXPECT_TRUE(pipeControl->getTextureCacheInvalidationEnable());
EXPECT_TRUE(pipeControl->getPipeControlFlushEnable());
EXPECT_TRUE(pipeControl->getVfCacheInvalidationEnable());
EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable());
EXPECT_TRUE(pipeControl->getStateCacheInvalidationEnable());
// XeHP+ only field
EXPECT_TRUE(pipeControl->getCompressionControlSurfaceCcsFlush());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenconfigureCSRtoNonDirtyStateWhenFlushTaskIsCalledThenNoCommandsAreAdded) {
configureCSRtoNonDirtyState<FamilyType>(true);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
EXPECT_EQ(0u, commandStreamReceiver.commandStream.getUsed());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenMultiOsContextCommandStreamReceiverWhenFlushTaskIsCalledThenCommandStreamReceiverStreamIsUsed) {
configureCSRtoNonDirtyState<FamilyType>(true);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.multiOsContextCapable = true;
commandStream.getSpace(4);
flushTask(commandStreamReceiver);
EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiver.commandStream.getUsed());
auto batchBufferStart = genCmdCast<typename FamilyType::MI_BATCH_BUFFER_START *>(commandStreamReceiver.commandStream.getCpuBase());
EXPECT_NE(nullptr, batchBufferStart);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCsrThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) {
auto &mockCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCsr.timestampPacketWriteEnabled = false;
configureCSRtoNonDirtyState<FamilyType>(true);
mockCsr.getCS(1024u);
auto &csrCommandStream = mockCsr.commandStream;
//we do level change that will emit PPC, fill all the space so only BB end fits.
taskLevel++;
auto ppcSize = MemorySynchronizationCommands<FamilyType>::getSizeForSinglePipeControl();
auto fillSize = MemoryConstants::cacheLineSize - ppcSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
csrCommandStream.getSpace(fillSize);
auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize;
flushTask(mockCsr);
EXPECT_EQ(expectedUsedSize, mockCsr.commandStream.getUsed());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, GivenSameTaskLevelThenDontSendPipeControl) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
// Configure the CSR to not need to submit any state or commands.
configureCSRtoNonDirtyState<FamilyType>(true);
flushTask(commandStreamReceiver);
EXPECT_EQ(taskLevel, commandStreamReceiver.taskLevel);
auto sizeUsed = commandStreamReceiver.commandStream.getUsed();
EXPECT_EQ(sizeUsed, 0u);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenDeviceWithThreadGroupPreemptionSupportThenDontSendMediaVfeStateIfNotDirty) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForcePreemptionMode.set(static_cast<int32_t>(PreemptionMode::ThreadGroup));
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->setPreemptionMode(PreemptionMode::ThreadGroup);
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
// Configure the CSR to not need to submit any state or commands.
configureCSRtoNonDirtyState<FamilyType>(true);
flushTask(*commandStreamReceiver);
EXPECT_EQ(taskLevel, commandStreamReceiver->peekTaskLevel());
auto sizeUsed = commandStreamReceiver->commandStream.getUsed();
EXPECT_EQ(0u, sizeUsed);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenCommandStreamReceiverWithInstructionCacheRequestWhenFlushTaskIsCalledThenPipeControlWithInstructionCacheIsEmitted) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>(true);
commandStreamReceiver.registerInstructionCacheFlush();
EXPECT_EQ(1u, commandStreamReceiver.recursiveLockCounter);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
auto pipeControlCmd = reinterpret_cast<typename FamilyType::PIPE_CONTROL *>(*itorPC);
EXPECT_TRUE(pipeControlCmd->getInstructionCacheInvalidateEnable());
EXPECT_FALSE(commandStreamReceiver.requiresInstructionCacheFlush);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenHigherTaskLevelWhenTimestampPacketWriteIsEnabledThenDontAddPipeControl) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.timestampPacketWriteEnabled = true;
commandStreamReceiver.isPreambleSent = true;
configureCSRtoNonDirtyState<FamilyType>(true);
commandStreamReceiver.taskLevel = taskLevel;
taskLevel++; // submit with higher taskLevel
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itorPC);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, WhenForcePipeControlPriorToWalkerIsSetThenAddExtraPipeControls) {
DebugManagerStateRestore stateResore;
DebugManager.flags.ForcePipeControlPriorToWalker.set(true);
DebugManager.flags.FlushAllCaches.set(true);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
configureCSRtoNonDirtyState<FamilyType>(true);
commandStreamReceiver.taskLevel = taskLevel;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
GenCmdList::iterator itor = cmdList.begin();
int counterPC = 0;
while (itor != cmdList.end()) {
auto pipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*itor);
if (pipeControl) {
switch (counterPC) {
case 0: // First pipe control with CS Stall
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), false);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), false);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), false);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), false);
break;
case 1: // Second pipe control with all flushes
EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true);
EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true);
EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true);
EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true);
EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true);
default:
break;
}
counterPC++;
}
++itor;
}
EXPECT_EQ(counterPC, 2);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, whenSamplerCacheFlushNotRequiredThenDontSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable;
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode();
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired);
configureCSRtoNonDirtyState<FamilyType>(true);
commandStreamReceiver.taskLevel = taskLevel;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true;
flushTask(commandStreamReceiver);
EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.samplerCacheFlushRequired);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itorPC);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, whenSamplerCacheFlushBeforeAndWaSamplerCacheFlushBetweenRedescribedSurfaceReadsDasabledThenDontSendPipecontrol) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore);
configureCSRtoNonDirtyState<FamilyType>(true);
commandStreamReceiver.taskLevel = taskLevel;
NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable;
waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = false;
flushTask(commandStreamReceiver);
EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u);
EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto itorPC = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itorPC);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, WhenFlushingTaskThenStateBaseAddressProgrammingShouldMatchTracking) {
typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
auto gmmHelper = pDevice->getGmmHelper();
auto stateHeapMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER);
auto l1CacheOnMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
flushTask(commandStreamReceiver);
auto &commandStreamCSR = commandStreamReceiver.commandStream;
HardwareParse::parseCommands<FamilyType>(commandStreamCSR, 0);
HardwareParse::findHardwareCommands<FamilyType>();
ASSERT_NE(nullptr, cmdStateBaseAddress);
auto &cmd = *reinterpret_cast<STATE_BASE_ADDRESS *>(cmdStateBaseAddress);
EXPECT_EQ(dsh.getCpuBase(), reinterpret_cast<void *>(cmd.getDynamicStateBaseAddress()));
EXPECT_EQ(commandStreamReceiver.getMemoryManager()->getInternalHeapBaseAddress(commandStreamReceiver.rootDeviceIndex, ioh.getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), cmd.getInstructionBaseAddress());
EXPECT_EQ(ioh.getCpuBase(), reinterpret_cast<void *>(cmd.getIndirectObjectBaseAddress()));
EXPECT_EQ(ssh.getCpuBase(), reinterpret_cast<void *>(cmd.getSurfaceStateBaseAddress()));
EXPECT_EQ(l1CacheOnMocs, cmd.getStatelessDataPortAccessMemoryObjectControlState());
EXPECT_EQ(stateHeapMocs, cmd.getInstructionMemoryObjectControlState());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, GivenBlockingWhenFlushingTaskThenPipeControlProgrammedCorrectly) {
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
// Configure the CSR to not need to submit any state or commands
configureCSRtoNonDirtyState<FamilyType>(true);
// Force a PIPE_CONTROL through a blocking flag
auto blocking = true;
auto &commandStreamTask = commandQueue.getCS(1024);
auto &commandStreamCSR = commandStreamReceiver->getCS();
commandStreamReceiver->lastSentCoherencyRequest = 0;
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
dispatchFlags.blocking = blocking;
dispatchFlags.guardCommandBufferWithPipeControl = true;
commandStreamReceiver->flushTask(
commandStreamTask,
0,
dsh,
ioh,
ssh,
taskLevel,
dispatchFlags,
*pDevice);
// Verify that taskCS got modified, while csrCS remained intact
EXPECT_GT(commandStreamTask.getUsed(), 0u);
EXPECT_EQ(0u, commandStreamCSR.getUsed());
// Parse command list to verify that PC got added to taskCS
cmdList.clear();
parseCommands<FamilyType>(commandStreamTask, 0);
auto itorTaskCS = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorTaskCS);
// Parse command list to verify that PC wasn't added to csrCS
cmdList.clear();
parseCommands<FamilyType>(commandStreamCSR, 0);
auto numberOfPC = getCommandsList<PIPE_CONTROL>().size();
EXPECT_EQ(0u, numberOfPC);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenCsrInNonDirtyStateWhenflushTaskIsCalledThenNoFlushIsCalled) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(mockCsr);
configureCSRtoNonDirtyState<FamilyType>(true);
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
mockCsr->flushTask(commandStream,
0,
dsh,
ioh,
ssh,
taskLevel,
dispatchFlags,
*pDevice);
EXPECT_EQ(0, mockCsr->flushCalledCount);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenCsrInNonDirtyStateAndBatchingModeWhenflushTaskIsCalledWithDisabledPreemptionThenSubmissionIsNotRecorded) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
configureCSRtoNonDirtyState<FamilyType>(true);
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
mockCsr->flushTask(commandStream,
0,
dsh,
ioh,
ssh,
taskLevel,
dispatchFlags,
*pDevice);
EXPECT_EQ(0, mockCsr->flushCalledCount);
EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty());
//surfaces are non resident
auto &surfacesForResidency = mockCsr->getResidencyAllocations();
EXPECT_EQ(0u, surfacesForResidency.size());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenCsrInBatchingModeWhenRecordedBatchBufferIsBeingSubmittedThenFlushIsCalledWithRecordedCommandBuffer) {
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
auto &commandStream = commandQueue.getCS(4096u);
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->useNewResourceImplicitFlush = false;
mockCsr->useGpuIdleImplicitFlush = false;
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
configureCSRtoNonDirtyState<FamilyType>(true);
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.requiresCoherency = true;
mockCsr->lastSentCoherencyRequest = 1;
commandStream.getSpace(4);
mockCsr->flushTask(commandStream,
4,
dsh,
ioh,
ssh,
taskLevel,
dispatchFlags,
*pDevice);
EXPECT_EQ(0, mockCsr->flushCalledCount);
auto &surfacesForResidency = mockCsr->getResidencyAllocations();
EXPECT_EQ(0u, surfacesForResidency.size());
auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers();
EXPECT_FALSE(cmdBufferList.peekIsEmpty());
auto cmdBuffer = cmdBufferList.peekHead();
//preemption allocation + sip kernel
size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0;
csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0;
EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size());
//copy those surfaces
std::vector<GraphicsAllocation *> residentSurfaces = cmdBuffer->surfaces;
for (auto &graphicsAllocation : residentSurfaces) {
EXPECT_TRUE(graphicsAllocation->isResident(mockCsr->getOsContext().getContextId()));
EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(mockCsr->getOsContext().getContextId()));
}
mockCsr->flushBatchedSubmissions();
EXPECT_FALSE(mockCsr->recordedCommandBuffer->batchBuffer.low_priority);
EXPECT_TRUE(mockCsr->recordedCommandBuffer->batchBuffer.requiresCoherency);
EXPECT_EQ(mockCsr->recordedCommandBuffer->batchBuffer.commandBufferAllocation, commandStream.getGraphicsAllocation());
EXPECT_EQ(4u, mockCsr->recordedCommandBuffer->batchBuffer.startOffset);
EXPECT_EQ(1, mockCsr->flushCalledCount);
EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty());
EXPECT_EQ(0u, surfacesForResidency.size());
for (auto &graphicsAllocation : residentSurfaces) {
EXPECT_FALSE(graphicsAllocation->isResident(mockCsr->getOsContext().getContextId()));
}
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenNothingToFlushWhenFlushTaskCalledThenDontFlushStamp) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(mockCsr);
configureCSRtoNonDirtyState<FamilyType>(true);
EXPECT_EQ(0, mockCsr->flushCalledCount);
auto previousFlushStamp = mockCsr->flushStamp->peekStamp();
auto cmplStamp = flushTask(*mockCsr);
EXPECT_EQ(mockCsr->flushStamp->peekStamp(), previousFlushStamp);
EXPECT_EQ(previousFlushStamp, cmplStamp.flushStamp);
EXPECT_EQ(0, mockCsr->flushCalledCount);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPPlusTests, givenEpilogueRequiredFlagWhenTaskIsSubmittedDirectlyThenItPointsBackToCsr) {
configureCSRtoNonDirtyState<FamilyType>(true);
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
EXPECT_EQ(0u, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags));
dispatchFlags.epilogueRequired = true;
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags));
auto data = commandStream.getSpace(MemoryConstants::cacheLineSize);
memset(data, 0, MemoryConstants::cacheLineSize);
commandStreamReceiver.storeMakeResidentAllocations = true;
commandStreamReceiver.flushTask(commandStream,
0,
dsh,
ioh,
ssh,
taskLevel,
dispatchFlags,
*pDevice);
auto &commandStreamReceiverStream = commandStreamReceiver.getCS(0u);
EXPECT_EQ(MemoryConstants::cacheLineSize * 2, commandStream.getUsed());
EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiverStream.getUsed());
parseCommands<FamilyType>(commandStream, 0);
auto itBBend = find<typename FamilyType::MI_BATCH_BUFFER_END *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(itBBend, cmdList.end());
auto itBatchBufferStart = find<typename FamilyType::MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
EXPECT_NE(itBatchBufferStart, cmdList.end());
auto batchBufferStart = genCmdCast<typename FamilyType::MI_BATCH_BUFFER_START *>(*itBatchBufferStart);
EXPECT_EQ(batchBufferStart->getBatchBufferStartAddressGraphicsaddress472(), commandStreamReceiverStream.getGraphicsAllocation()->getGpuAddress());
parseCommands<FamilyType>(commandStreamReceiverStream, 0);
itBBend = find<typename FamilyType::MI_BATCH_BUFFER_END *>(cmdList.begin(), cmdList.end());
void *bbEndAddress = *itBBend;
EXPECT_EQ(commandStreamReceiverStream.getCpuBase(), bbEndAddress);
EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiverStream.getGraphicsAllocation()));
}

View File

@ -0,0 +1,852 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/command_stream/scratch_space_controller.h"
#include "shared/source/command_stream/scratch_space_controller_xehp_plus.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/timestamp_packet.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/resource_barrier.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_csr.h"
#include "opencl/test/unit_test/mocks/mock_event.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_memory_manager.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "opencl/test/unit_test/mocks/mock_scratch_space_controller_xehp_plus.h"
#include "opencl/test/unit_test/mocks/mock_timestamp_container.h"
#include "test.h"
#include "gtest/gtest.h"
#include "reg_configs_common.h"
using namespace NEO;
namespace NEO {
template <typename GfxFamily>
class ImplicitFlushSettings {
public:
static bool &getSettingForNewResource();
static bool &getSettingForGpuIdle();
private:
static bool defaultSettingForNewResource;
static bool defaultSettingForGpuIdle;
};
} // namespace NEO
struct CommandStreamReceiverHwTestXeHPPlus : public ClDeviceFixture,
public HardwareParse,
public ::testing::Test {
void SetUp() override {
ClDeviceFixture::SetUp();
HardwareParse::SetUp();
}
void TearDown() override {
HardwareParse::TearDown();
ClDeviceFixture::TearDown();
}
};
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenPreambleSentWhenL3ConfigRequestChangedThenDontProgramL3Register) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
size_t GWS = 1;
MockContext ctx(pClDevice);
MockKernelWithInternals kernel(*pClDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pClDevice, 0, false);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
auto &commandStreamCSR = commandStreamReceiver->getCS();
PreemptionMode initialPreemptionMode = commandStreamReceiver->lastPreemptionMode;
PreemptionMode devicePreemptionMode = pDevice->getPreemptionMode();
commandStreamReceiver->isPreambleSent = true;
commandStreamReceiver->lastSentL3Config = 0;
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
parseCommands<FamilyType>(commandStreamCSR, 0);
auto itorCmd = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
if (PreemptionHelper::getRequiredCmdStreamSize<FamilyType>(initialPreemptionMode, devicePreemptionMode) > 0u) {
ASSERT_NE(cmdList.end(), itorCmd);
} else {
EXPECT_EQ(cmdList.end(), itorCmd);
}
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, WhenCommandStreamReceiverHwIsCreatedThenDefaultSshSizeIs2MB) {
auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver();
EXPECT_EQ(2 * MB, commandStreamReceiver.defaultSshSize);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, WhenScratchSpaceExistsThenReturnNonZeroGpuAddressToPatch) {
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
void *ssh = alignedMalloc(512, 4096);
uint32_t perThreadScratchSize = 0x400;
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
commandStreamReceiver->getScratchSpaceController()->setRequiredScratchSpace(ssh, 0u, perThreadScratchSize, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
ASSERT_NE(nullptr, commandStreamReceiver->getScratchAllocation());
EXPECT_TRUE(cfeStateDirty);
auto scratchSpaceAddr = commandStreamReceiver->getScratchPatchAddress();
constexpr uint64_t notExpectedScratchGpuAddr = 0;
EXPECT_NE(notExpectedScratchGpuAddr, scratchSpaceAddr);
alignedFree(ssh);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, WhenOsContextSupportsMultipleDevicesThenScratchSpaceAllocationIsPlacedOnEachSupportedDevice) {
DebugManagerStateRestore restorer;
DebugManager.flags.CreateMultipleSubDevices.set(2u);
ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment();
executionEnvironment->memoryManager.reset(new MockMemoryManager(false, true, *executionEnvironment));
uint32_t tileMask = 0b11;
std::unique_ptr<OsContext> osContext(OsContext::create(nullptr, 0u, tileMask, EngineTypeUsage{aub_stream::ENGINE_CCS, EngineUsage::Regular}, PreemptionMode::MidThread,
false));
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*executionEnvironment, 0, tileMask);
initPlatform();
void *ssh = alignedMalloc(512, 4096);
uint32_t perThreadScratchSize = 0x400;
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
commandStreamReceiver->getScratchSpaceController()->setRequiredScratchSpace(ssh, 0u, perThreadScratchSize, 0u, 0u, *osContext, stateBaseAddressDirty, cfeStateDirty);
auto allocation = commandStreamReceiver->getScratchAllocation();
EXPECT_EQ(tileMask, static_cast<uint32_t>(allocation->storageInfo.memoryBanks.to_ulong()));
alignedFree(ssh);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, WhenScratchSpaceNotExistThenReturnZeroGpuAddressToPatch) {
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchSpaceAddr = commandStreamReceiver.getScratchPatchAddress();
constexpr uint64_t expectedScratchGpuAddr = 0;
EXPECT_EQ(expectedScratchGpuAddr, scratchSpaceAddr);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, whenProgrammingMiSemaphoreWaitThenSetRegisterPollModeMemoryPoll) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
MI_SEMAPHORE_WAIT miSemaphoreWait = FamilyType::cmdInitMiSemaphoreWait;
EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, miSemaphoreWait.getRegisterPollMode());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenSratchAllocationRequestedThenProgramCfeStateWithScratchAllocation) {
using CFE_STATE = typename FamilyType::CFE_STATE;
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
const HardwareInfo &hwInfo = *defaultHwInfo;
size_t GWS = 1;
MockContext ctx(pClDevice);
MockKernelWithInternals kernel(*pClDevice);
CommandQueueHw<FamilyType> commandQueue(&ctx, pClDevice, 0, false);
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver->getScratchSpaceController());
scratchController->slotId = 2u;
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
auto &commandStreamCSR = commandStreamReceiver->getCS();
kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = 0x1000;
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
uint32_t computeUnits = hwHelper.getComputeUnitsUsedForScratch(&hwInfo);
size_t scratchSpaceSize = kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * computeUnits;
commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr);
commandQueue.flush();
parseCommands<FamilyType>(commandStreamCSR, 0);
findHardwareCommands<FamilyType>();
EXPECT_EQ(kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0], commandStreamReceiver->requiredScratchSize);
EXPECT_EQ(scratchSpaceSize, scratchController->scratchSizeBytes);
EXPECT_EQ(scratchSpaceSize, scratchController->getScratchSpaceAllocation()->getUnderlyingBufferSize());
ASSERT_NE(nullptr, cmdMediaVfeState);
auto cfeState = static_cast<CFE_STATE *>(cmdMediaVfeState);
uint32_t bufferOffset = static_cast<uint32_t>(scratchController->slotId * scratchController->singleSurfaceStateSize * 2);
EXPECT_EQ(bufferOffset, cfeState->getScratchSpaceBuffer());
RENDER_SURFACE_STATE *scratchState = reinterpret_cast<RENDER_SURFACE_STATE *>(scratchController->surfaceStateHeap + bufferOffset);
EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), scratchState->getSurfaceBaseAddress());
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, scratchState->getSurfaceType());
SURFACE_STATE_BUFFER_LENGTH length = {0};
length.Length = static_cast<uint32_t>(computeUnits - 1);
EXPECT_EQ(length.SurfaceState.Depth + 1u, scratchState->getDepth());
EXPECT_EQ(length.SurfaceState.Width + 1u, scratchState->getWidth());
EXPECT_EQ(length.SurfaceState.Height + 1u, scratchState->getHeight());
EXPECT_EQ(kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0], scratchState->getSurfacePitch());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenNewSshProvidedAndNoScratchAllocationExistThenNoDirtyBitSet) {
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver->getScratchSpaceController());
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
scratchController->surfaceStateHeap = reinterpret_cast<char *>(0x1000);
scratchController->setRequiredScratchSpace(reinterpret_cast<void *>(0x2000), 0u, 0u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_EQ(scratchController->surfaceStateHeap, reinterpret_cast<char *>(0x2000));
EXPECT_FALSE(cfeStateDirty);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenRequiredScratchSpaceIsSetThenPerThreadScratchSizeIsAlignedTo64) {
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver->getScratchSpaceController());
uint32_t perThreadScratchSize = 1;
uint32_t expectedValue = 1 << 6;
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
uint8_t surfaceHeap[1000];
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, perThreadScratchSize, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_EQ(expectedValue, scratchController->perThreadScratchSize);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenNewSshProvidedAndScratchAllocationExistsThenSetDirtyBitCopyCurrentState) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
auto commandStreamReceiver = std::make_unique<MockCsrHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver->getScratchSpaceController());
scratchController->slotId = 0;
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
void *oldSurfaceHeap = alignedMalloc(0x1000, 0x1000);
scratchController->setRequiredScratchSpace(oldSurfaceHeap, 0u, 0x1000u, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(1u, scratchController->slotId);
EXPECT_EQ(scratchController->surfaceStateHeap, oldSurfaceHeap);
char *surfaceStateBuf = static_cast<char *>(oldSurfaceHeap) + scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2;
GraphicsAllocation *scratchAllocation = scratchController->scratchAllocation;
RENDER_SURFACE_STATE *surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuf);
EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, surfaceState->getSurfaceType());
void *newSurfaceHeap = alignedMalloc(0x1000, 0x1000);
scratchController->setRequiredScratchSpace(newSurfaceHeap, 0u, 0x1000u, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(1u, scratchController->slotId);
EXPECT_EQ(scratchController->surfaceStateHeap, newSurfaceHeap);
EXPECT_EQ(scratchAllocation, scratchController->scratchAllocation);
surfaceStateBuf = static_cast<char *>(newSurfaceHeap) + scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2;
surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuf);
EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, surfaceState->getSurfaceType());
alignedFree(oldSurfaceHeap);
alignedFree(newSurfaceHeap);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenBiggerScratchSpaceRequiredThenReplaceAllocation) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver->getScratchSpaceController());
scratchController->slotId = 6;
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
void *surfaceHeap = alignedMalloc(0x1000, 0x1000);
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, 0x1000u, 0u, commandStreamReceiver->taskCount,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(7u, scratchController->slotId);
uint64_t offset = static_cast<uint64_t>(scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2);
EXPECT_EQ(offset, scratchController->getScratchPatchAddress());
EXPECT_EQ(0u, scratchController->calculateNewGSH());
uint64_t gpuVa = scratchController->scratchAllocation->getGpuAddress();
char *surfaceStateBuf = static_cast<char *>(scratchController->surfaceStateHeap) + offset;
RENDER_SURFACE_STATE *surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuf);
EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress());
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, 0x2000u, 0u, commandStreamReceiver->taskCount,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(8u, scratchController->slotId);
offset = static_cast<uint64_t>(scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2);
EXPECT_EQ(offset, scratchController->getScratchPatchAddress());
EXPECT_NE(gpuVa, scratchController->scratchAllocation->getGpuAddress());
gpuVa = scratchController->scratchAllocation->getGpuAddress();
surfaceStateBuf = static_cast<char *>(scratchController->surfaceStateHeap) + offset;
surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuf);
EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress());
alignedFree(surfaceHeap);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenScratchSlotIsNonZeroThenSlotIdIsUpdatedAndCorrectOffsetIsSet) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver->getScratchSpaceController());
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
void *surfaceHeap = alignedMalloc(0x1000, 0x1000);
scratchController->setRequiredScratchSpace(surfaceHeap, 1u, 0x1000u, 0u, commandStreamReceiver->taskCount,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(1u, scratchController->slotId);
EXPECT_TRUE(scratchController->updateSlots);
uint64_t offset = static_cast<uint64_t>(scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2);
EXPECT_EQ(offset, scratchController->getScratchPatchAddress());
EXPECT_EQ(0u, scratchController->calculateNewGSH());
uint64_t gpuVa = scratchController->scratchAllocation->getGpuAddress();
char *surfaceStateBuf = static_cast<char *>(scratchController->surfaceStateHeap) + offset;
RENDER_SURFACE_STATE *surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuf);
EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress());
alignedFree(surfaceHeap);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenProgramHeapsThenSetReqScratchSpaceAndProgramSurfaceStateAreCalled) {
class MockScratchSpaceControllerXeHPPlus : public ScratchSpaceControllerXeHPPlus {
public:
uint32_t requiredScratchSpaceCalledTimes = 0u;
uint32_t programSurfaceStateCalledTimes = 0u;
MockScratchSpaceControllerXeHPPlus(uint32_t rootDeviceIndex,
ExecutionEnvironment &environment,
InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPPlus(rootDeviceIndex, environment, allocationStorage) {}
using ScratchSpaceControllerXeHPPlus::scratchAllocation;
void setRequiredScratchSpace(void *sshBaseAddress,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override {
requiredScratchSpaceCalledTimes++;
}
protected:
void programSurfaceState() override {
programSurfaceStateCalledTimes++;
};
};
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
std::unique_ptr<ScratchSpaceController> scratchController = std::make_unique<MockScratchSpaceControllerXeHPPlus>(pDevice->getRootDeviceIndex(),
*pDevice->executionEnvironment,
*commandStreamReceiver->getInternalAllocationStorage());
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
void *surfaceHeap = alignedMalloc(0x1000, 0x1000);
NEO::GraphicsAllocation heap1(1u, NEO::GraphicsAllocation::AllocationType::BUFFER, surfaceHeap, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u);
NEO::GraphicsAllocation heap2(1u, NEO::GraphicsAllocation::AllocationType::BUFFER, surfaceHeap, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u);
NEO::GraphicsAllocation heap3(1u, NEO::GraphicsAllocation::AllocationType::BUFFER, surfaceHeap, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u);
HeapContainer container;
container.push_back(&heap1);
container.push_back(&heap2);
container.push_back(&heap3);
scratchController->programHeaps(container, 0u, 1u, 0u, 0u, commandStreamReceiver->getOsContext(), stateBaseAddressDirty, cfeStateDirty);
auto scratch = static_cast<MockScratchSpaceControllerXeHPPlus *>(scratchController.get());
EXPECT_EQ(scratch->requiredScratchSpaceCalledTimes, 1u);
EXPECT_EQ(scratch->programSurfaceStateCalledTimes, 2u);
alignedFree(surfaceHeap);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchWhenSetNewSshPtrAndChangeIdIsFalseThenSlotIdIsNotChanged) {
class MockScratchSpaceControllerXeHPPlus : public ScratchSpaceControllerXeHPPlus {
public:
uint32_t programSurfaceStateCalledTimes = 0u;
MockScratchSpaceControllerXeHPPlus(uint32_t rootDeviceIndex,
ExecutionEnvironment &environment,
InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPPlus(rootDeviceIndex, environment, allocationStorage) {}
using ScratchSpaceControllerXeHPPlus::scratchAllocation;
using ScratchSpaceControllerXeHPPlus::slotId;
protected:
void programSurfaceState() override {
programSurfaceStateCalledTimes++;
};
};
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
std::unique_ptr<ScratchSpaceController> scratchController = std::make_unique<MockScratchSpaceControllerXeHPPlus>(pDevice->getRootDeviceIndex(),
*pDevice->executionEnvironment,
*commandStreamReceiver->getInternalAllocationStorage());
NEO::GraphicsAllocation graphicsAllocation(1u, NEO::GraphicsAllocation::AllocationType::BUFFER, nullptr, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u);
bool cfeStateDirty = false;
void *surfaceHeap = alignedMalloc(0x1000, 0x1000);
auto scratch = static_cast<MockScratchSpaceControllerXeHPPlus *>(scratchController.get());
scratch->slotId = 10;
scratch->scratchAllocation = &graphicsAllocation;
scratch->setNewSshPtr(surfaceHeap, cfeStateDirty, false);
scratch->scratchAllocation = nullptr;
EXPECT_EQ(10u, scratch->slotId);
EXPECT_EQ(scratch->programSurfaceStateCalledTimes, 1u);
EXPECT_TRUE(cfeStateDirty);
alignedFree(surfaceHeap);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchWhenProgramSurfaceStateAndUpdateSlotIsFalseThenSlotIdIsNotChanged) {
class MockScratchSpaceControllerXeHPPlus : public ScratchSpaceControllerXeHPPlus {
public:
MockScratchSpaceControllerXeHPPlus(uint32_t rootDeviceIndex,
ExecutionEnvironment &environment,
InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPPlus(rootDeviceIndex, environment, allocationStorage) {}
using ScratchSpaceControllerXeHPPlus::programSurfaceState;
using ScratchSpaceControllerXeHPPlus::scratchAllocation;
using ScratchSpaceControllerXeHPPlus::slotId;
using ScratchSpaceControllerXeHPPlus::surfaceStateHeap;
using ScratchSpaceControllerXeHPPlus::updateSlots;
};
auto commandStreamReceiver = new MockCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(commandStreamReceiver);
std::unique_ptr<ScratchSpaceController> scratchController = std::make_unique<MockScratchSpaceControllerXeHPPlus>(pDevice->getRootDeviceIndex(),
*pDevice->executionEnvironment,
*commandStreamReceiver->getInternalAllocationStorage());
NEO::GraphicsAllocation graphicsAllocation(1u, NEO::GraphicsAllocation::AllocationType::BUFFER, nullptr, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u);
void *surfaceHeap = alignedMalloc(0x1000, 0x1000);
auto scratch = static_cast<MockScratchSpaceControllerXeHPPlus *>(scratchController.get());
scratch->surfaceStateHeap = static_cast<char *>(surfaceHeap);
scratch->slotId = 10;
scratch->updateSlots = false;
scratch->scratchAllocation = &graphicsAllocation;
scratch->programSurfaceState();
scratch->scratchAllocation = nullptr;
EXPECT_EQ(10u, scratch->slotId);
alignedFree(surfaceHeap);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenBiggerPrivateScratchSpaceRequiredThenReplaceAllocation) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
DebugManagerStateRestore restorer;
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
RENDER_SURFACE_STATE surfaceState[6];
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver.getScratchSpaceController());
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
uint32_t sizeForPrivateScratch = MemoryConstants::pageSize;
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
uint64_t gpuVa = scratchController->privateScratchAllocation->getGpuAddress();
EXPECT_EQ(gpuVa, surfaceState[3].getSurfaceBaseAddress());
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch * 2, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_NE(gpuVa, scratchController->privateScratchAllocation->getGpuAddress());
EXPECT_EQ(scratchController->privateScratchAllocation->getGpuAddress(), surfaceState[5].getSurfaceBaseAddress());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceControllerWithOnlyPrivateScratchSpaceWhenGettingPatchAddressThenGetCorrectValue) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
DebugManagerStateRestore restorer;
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
RENDER_SURFACE_STATE surfaceState[6];
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver.getScratchSpaceController());
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
uint32_t sizeForPrivateScratch = MemoryConstants::pageSize;
EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation());
EXPECT_EQ(nullptr, scratchController->getPrivateScratchSpaceAllocation());
EXPECT_EQ(0u, scratchController->getScratchPatchAddress());
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
auto expectedPatchAddress = 2 * sizeof(RENDER_SURFACE_STATE);
EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation());
EXPECT_NE(nullptr, scratchController->getPrivateScratchSpaceAllocation());
EXPECT_EQ(expectedPatchAddress, scratchController->getScratchPatchAddress());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenNotBiggerPrivateScratchSpaceRequiredThenCfeStateIsNotDirty) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
DebugManagerStateRestore restorer;
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
RENDER_SURFACE_STATE surfaceState[4];
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver.getScratchSpaceController());
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
uint32_t sizeForPrivateScratch = MemoryConstants::pageSize;
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
uint64_t gpuVa = scratchController->privateScratchAllocation->getGpuAddress();
cfeStateDirty = false;
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_FALSE(cfeStateDirty);
EXPECT_EQ(gpuVa, scratchController->privateScratchAllocation->getGpuAddress());
EXPECT_EQ(gpuVa, surfaceState[3].getSurfaceBaseAddress());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateWithoutPrivateScratchSpaceWhenDoubleAllocationsScratchSpaceIsUsedThenPrivateScratchAddressIsZero) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
DebugManagerStateRestore restorer;
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
RENDER_SURFACE_STATE surfaceState[4];
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver.getScratchSpaceController());
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
uint32_t sizeForScratch = MemoryConstants::pageSize;
scratchController->setRequiredScratchSpace(surfaceState, 0u, sizeForScratch, 0u, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(nullptr, scratchController->privateScratchAllocation);
EXPECT_EQ(0u, surfaceState[3].getSurfaceBaseAddress());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceControllerWhenDebugKeyForPrivateScratchIsDisabledThenThereAre16Slots) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnablePrivateScratchSlot1.set(0);
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver.getScratchSpaceController());
EXPECT_EQ(16u, scratchController->stateSlotsCount);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceControllerWhenDebugKeyForPrivateScratchIsEnabledThenThereAre32Slots) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver.getScratchSpaceController());
EXPECT_EQ(32u, scratchController->stateSlotsCount);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenScratchSpaceSurfaceStateEnabledWhenSizeForPrivateScratchSpaceIsMisalignedThenAlignItTo64) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
DebugManagerStateRestore restorer;
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
RENDER_SURFACE_STATE surfaceState[4];
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver.getScratchSpaceController());
uint32_t misalignedSizeForPrivateScratch = MemoryConstants::pageSize + 1;
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_NE(scratchController->privateScratchSizeBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
EXPECT_EQ(scratchController->privateScratchSizeBytes, alignUp(misalignedSizeForPrivateScratch, 64) * scratchController->computeUnitsUsedForScratch);
EXPECT_EQ(scratchController->privateScratchSizeBytes, scratchController->getPrivateScratchSpaceAllocation()->getUnderlyingBufferSize());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenDisabledPrivateScratchSpaceWhenSizeForPrivateScratchSpaceIsProvidedThenItIsNotCreated) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
DebugManagerStateRestore restorer;
DebugManager.flags.EnablePrivateScratchSlot1.set(0);
RENDER_SURFACE_STATE surfaceState[4];
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver.getScratchSpaceController());
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
scratchController->setRequiredScratchSpace(surfaceState, 0u, MemoryConstants::pageSize, MemoryConstants::pageSize, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_EQ(0u, scratchController->privateScratchSizeBytes);
EXPECT_EQ(nullptr, scratchController->getPrivateScratchSpaceAllocation());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenDisabledPrivateScratchSpaceWhenGettingOffsetForSlotThenEachSlotContainsOnlyOneSurfaceState) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
DebugManagerStateRestore restorer;
DebugManager.flags.EnablePrivateScratchSlot1.set(0);
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto scratchController = static_cast<MockScratchSpaceControllerXeHPPlus *>(commandStreamReceiver.getScratchSpaceController());
EXPECT_EQ(sizeof(RENDER_SURFACE_STATE), scratchController->getOffsetToSurfaceState(1u));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenBlockedCacheFlushCmdWhenSubmittingThenDispatchBlockedCommands) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
MockContext context(pClDevice);
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->timestampPacketWriteEnabled = true;
mockCsr->storeFlushedTaskStream = true;
auto cmdQ0 = clUniquePtr(new MockCommandQueueHw<FamilyType>(&context, pClDevice, nullptr));
auto &secondEngine = pDevice->getEngine(pDevice->getHardwareInfo().capabilityTable.defaultEngineType, EngineUsage::LowPriority);
static_cast<UltCommandStreamReceiver<FamilyType> *>(secondEngine.commandStreamReceiver)->timestampPacketWriteEnabled = true;
auto cmdQ1 = clUniquePtr(new MockCommandQueueHw<FamilyType>(&context, pClDevice, nullptr));
cmdQ1->gpgpuEngine = &secondEngine;
cmdQ1->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
EXPECT_NE(&cmdQ0->getGpgpuCommandStreamReceiver(), &cmdQ1->getGpgpuCommandStreamReceiver());
MockTimestampPacketContainer node0(*pDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer node1(*pDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1);
Event event0(cmdQ0.get(), 0, 0, 0); // on the same CSR
event0.addTimestampPacketNodes(node0);
Event event1(cmdQ1.get(), 0, 0, 0); // on different CSR
event1.addTimestampPacketNodes(node1);
uint32_t numEventsOnWaitlist = 3;
UserEvent userEvent;
cl_event waitlist[] = {&event0, &event1, &userEvent};
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr(Buffer::create(&context, 0, MemoryConstants::pageSize, nullptr, retVal));
cl_resource_barrier_descriptor_intel descriptor = {};
descriptor.mem_object = buffer.get();
BarrierCommand barrierCommand(cmdQ0.get(), &descriptor, 1);
cmdQ0->enqueueResourceBarrier(&barrierCommand, numEventsOnWaitlist, waitlist, nullptr);
userEvent.setStatus(CL_COMPLETE);
HardwareParse hwParserCsr;
HardwareParse hwParserCmdQ;
LinearStream taskStream(mockCsr->storedTaskStream.get(), mockCsr->storedTaskStreamSize);
taskStream.getSpace(mockCsr->storedTaskStreamSize);
hwParserCsr.parseCommands<FamilyType>(mockCsr->commandStream, 0);
hwParserCmdQ.parseCommands<FamilyType>(taskStream, 0);
{
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 1u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) {
expectedQueueSemaphoresCount += 2;
}
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0]));
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node0.getNode(0));
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
}
{
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size());
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0]));
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node1.getNode(0));
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
}
EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking);
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
EXPECT_EQ(pDevice->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode);
cmdQ0->isQueueBlocked();
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, WhenOsContextSupportsMultipleDevicesThenCommandStreamReceiverIsMultiOsContextCapable) {
uint32_t multiDeviceMask = 0b11;
uint32_t singleDeviceMask = 0b10;
std::unique_ptr<OsContext> multiDeviceOsContext(OsContext::create(nullptr, 0u, multiDeviceMask, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::MidThread,
false));
std::unique_ptr<OsContext> singleDeviceOsContext(OsContext::create(nullptr, 0u, singleDeviceMask, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::MidThread,
false));
EXPECT_EQ(2u, multiDeviceOsContext->getNumSupportedDevices());
EXPECT_EQ(1u, singleDeviceOsContext->getNumSupportedDevices());
UltCommandStreamReceiver<FamilyType> commandStreamReceiverMulti(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), multiDeviceMask);
commandStreamReceiverMulti.callBaseIsMultiOsContextCapable = true;
EXPECT_TRUE(commandStreamReceiverMulti.isMultiOsContextCapable());
EXPECT_EQ(2u, commandStreamReceiverMulti.deviceBitfield.count());
UltCommandStreamReceiver<FamilyType> commandStreamReceiverSingle(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), singleDeviceMask);
commandStreamReceiverSingle.callBaseIsMultiOsContextCapable = true;
EXPECT_FALSE(commandStreamReceiverSingle.isMultiOsContextCapable());
EXPECT_EQ(1u, commandStreamReceiverSingle.deviceBitfield.count());
}
HWTEST2_F(CommandStreamReceiverHwTestXeHPPlus, givenXE_HP_COREDefaultSupportEnabledWhenOsSupportsNewResourceImplicitFlushThenReturnOsSupportValue, IsXeHpCore) {
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(*osContext);
EXPECT_TRUE(ImplicitFlushSettings<FamilyType>::getSettingForNewResource());
VariableBackup<bool> defaultSettingForNewResourceBackup(&ImplicitFlushSettings<FamilyType>::getSettingForNewResource(), true);
if (commandStreamReceiver.getOSInterface()->newResourceImplicitFlush) {
EXPECT_TRUE(commandStreamReceiver.checkPlatformSupportsNewResourceImplicitFlush());
} else {
EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsNewResourceImplicitFlush());
}
}
HWTEST2_F(CommandStreamReceiverHwTestXeHPPlus, givenXE_HP_COREDefaultSupportDisabledWhenOsSupportsNewResourceImplicitFlushThenReturnOsSupportValue, IsXeHpCore) {
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(*osContext);
VariableBackup<bool> defaultSettingForNewResourceBackup(&ImplicitFlushSettings<FamilyType>::getSettingForNewResource(), false);
EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsNewResourceImplicitFlush());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenPlatformSupportsImplicitFlushForNewResourceWhenCsrIsMultiContextThenExpectNoSupport) {
VariableBackup<bool> defaultSettingForNewResourceBackup(&ImplicitFlushSettings<FamilyType>::getSettingForNewResource(), true);
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(*osContext);
commandStreamReceiver.multiOsContextCapable = true;
EXPECT_TRUE(ImplicitFlushSettings<FamilyType>::getSettingForNewResource());
EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsNewResourceImplicitFlush());
}
HWTEST2_F(CommandStreamReceiverHwTestXeHPPlus, givenXE_HP_COREDefaultSupportEnabledWhenOsSupportsGpuIdleImplicitFlushThenReturnOsSupportValue, IsXeHpCore) {
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(*osContext);
EXPECT_TRUE(ImplicitFlushSettings<FamilyType>::getSettingForGpuIdle());
VariableBackup<bool> defaultSettingForGpuIdleBackup(&ImplicitFlushSettings<FamilyType>::getSettingForGpuIdle(), true);
if (commandStreamReceiver.getOSInterface()->newResourceImplicitFlush) {
EXPECT_TRUE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush());
} else {
EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush());
}
}
HWTEST2_F(CommandStreamReceiverHwTestXeHPPlus, givenXE_HP_COREDefaultSupportDisabledWhenOsSupportsGpuIdleImplicitFlushThenReturnOsSupportValue, IsXeHpCore) {
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(*osContext);
VariableBackup<bool> defaultSettingForGpuIdleBackup(&ImplicitFlushSettings<FamilyType>::getSettingForGpuIdle(), false);
EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenPlatformSupportsImplicitFlushForIdleGpuWhenCsrIsMultiContextThenExpectNoSupport) {
VariableBackup<bool> defaultSettingForGpuIdleBackup(&ImplicitFlushSettings<FamilyType>::getSettingForGpuIdle(), true);
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(*osContext);
commandStreamReceiver.multiOsContextCapable = true;
EXPECT_TRUE(ImplicitFlushSettings<FamilyType>::getSettingForGpuIdle());
EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, givenPlatformSupportsImplicitFlushForIdleGpuWhenCsrIsMultiContextAndDirectSubmissionActiveThenExpectSupportTrue) {
VariableBackup<bool> defaultSettingForGpuIdleBackup(&ImplicitFlushSettings<FamilyType>::getSettingForGpuIdle(), true);
VariableBackup<bool> backupOsSettingForGpuIdle(&OSInterface::gpuIdleImplicitFlush, true);
osContext->setDirectSubmissionActive();
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
commandStreamReceiver.setupContext(*osContext);
commandStreamReceiver.multiOsContextCapable = true;
EXPECT_TRUE(ImplicitFlushSettings<FamilyType>::getSettingForGpuIdle());
EXPECT_TRUE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPPlus, whenCreatingWorkPartitionAllocationThenItsPropertiesAreCorrect) {
DebugManagerStateRestore restore{};
DebugManager.flags.EnableStaticPartitioning.set(1);
DebugManager.flags.EnableLocalMemory.set(1);
UltDeviceFactory deviceFactory{1, 2};
MockDevice &rootDevice = *deviceFactory.rootDevices[0];
CommandStreamReceiver &csr = rootDevice.getGpgpuCommandStreamReceiver();
StorageInfo workPartitionAllocationStorageInfo = csr.getWorkPartitionAllocation()->storageInfo;
EXPECT_EQ(rootDevice.getDeviceBitfield(), workPartitionAllocationStorageInfo.memoryBanks);
EXPECT_EQ(rootDevice.getDeviceBitfield(), workPartitionAllocationStorageInfo.pageTablesVisibility);
EXPECT_FALSE(workPartitionAllocationStorageInfo.cloningOfPageTables);
EXPECT_TRUE(workPartitionAllocationStorageInfo.tileInstanced);
}
HWTEST2_F(CommandStreamReceiverHwTestXeHPPlus, givenXeHpWhenRayTracingEnabledThenDoNotAddCommandBatchBuffer, IsXEHP) {
MockCsrHw<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto cmdSize = commandStreamReceiver.getCmdSizeForPerDssBackedBuffer(pDevice->getHardwareInfo());
EXPECT_EQ(0u, cmdSize);
std::unique_ptr<char> buffer(new char[cmdSize]);
LinearStream cs(buffer.get(), cmdSize);
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
dispatchFlags.usePerDssBackedBuffer = true;
commandStreamReceiver.programPerDssBackedBuffer(cs, *pDevice, dispatchFlags);
EXPECT_EQ(0u, cs.getUsed());
}

View File

@ -0,0 +1,276 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/aub/aub_helper.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw.h"
#include "opencl/source/helpers/hardware_context_controller.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_aub_stream.h"
#include "opencl/test/unit_test/mocks/mock_csr_simulated_common_hw.h"
#include "test.h"
using XeHPPlusMockSimulatedCsrHwTests = Test<ClDeviceFixture>;
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusMockSimulatedCsrHwTests, givenLocalMemoryEnabledWhenGlobalMmiosAreInitializedThenLmemIsInitializedAndLmemCfgMmioIsWritten) {
std::unique_ptr<MockSimulatedCsrHw<FamilyType>> csrSimulatedCommonHw(new MockSimulatedCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
csrSimulatedCommonHw->localMemoryEnabled = true;
auto stream = std::make_unique<MockAubStreamMockMmioWrite>();
csrSimulatedCommonHw->stream = stream.get();
csrSimulatedCommonHw->initGlobalMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00101010, 0x00000080u)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000cf58, 0x80000000u)));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusMockSimulatedCsrHwTests, givenAUBDumpForceAllToLocalMemoryWhenGlobalMmiosAreInitializedThenLmemIsInitializedAndLmemCfgMmioIsWritten) {
DebugManagerStateRestore debugRestorer;
DebugManager.flags.AUBDumpForceAllToLocalMemory.set(true);
std::unique_ptr<MockSimulatedCsrHw<FamilyType>> csrSimulatedCommonHw(new MockSimulatedCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
auto stream = std::make_unique<MockAubStreamMockMmioWrite>();
csrSimulatedCommonHw->stream = stream.get();
csrSimulatedCommonHw->initGlobalMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00101010, 0x00000080u)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000cf58, 0x80000000u)));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusMockSimulatedCsrHwTests, givenAubCommandStreamReceiverWhenGlobalMmiosAreInitializedThenMOCSRegistersAreConfigured) {
MockSimulatedCsrHw<FamilyType> csrSimulatedCommonHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto stream = std::make_unique<MockAubStreamMockMmioWrite>();
csrSimulatedCommonHw.stream = stream.get();
csrSimulatedCommonHw.initGlobalMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004000, 0x00000008)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004004, 0x00000038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004008, 0x00000038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000400C, 0x00000008)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004010, 0x00000018)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004014, 0x00060038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004018, 0x00000000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000401C, 0x00000033)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004020, 0x00060037)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004024, 0x0000003B)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004028, 0x00000032)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000402C, 0x00000036)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004030, 0x0000003A)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004034, 0x00000033)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004038, 0x00000037)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000403C, 0x0000003B)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004040, 0x00000030)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004044, 0x00000034)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004048, 0x00000038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000404C, 0x00000031)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004050, 0x00000032)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004054, 0x00000036)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004058, 0x0000003A)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000405C, 0x00000033)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004060, 0x00000037)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004064, 0x0000003B)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004068, 0x00000032)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000406C, 0x00000036)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004070, 0x0000003A)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004074, 0x00000033)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004078, 0x00000037)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000407C, 0x0000003B)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004080, 0x00000030)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004084, 0x00000034)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004088, 0x00000038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000408C, 0x00000031)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004090, 0x00000032)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004094, 0x00000036)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004098, 0x0000003A)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000409C, 0x00000033)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040A0, 0x00000037)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040A4, 0x0000003B)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040A8, 0x00000032)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040AC, 0x00000036)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040B0, 0x0000003A)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040B4, 0x00000033)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040B8, 0x00000037)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040BC, 0x0000003B)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040C0, 0x00000038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040C4, 0x00000034)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040C8, 0x00000038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040CC, 0x00000031)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040D0, 0x00000032)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040D4, 0x00000036)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040D8, 0x0000003A)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040DC, 0x00000033)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040E0, 0x00000037)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040E4, 0x0000003B)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040E8, 0x00000032)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040EC, 0x00000036)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040F0, 0x00000038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040F4, 0x00000038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040F8, 0x00000038)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040FC, 0x00000038)));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusMockSimulatedCsrHwTests, givenAubCommandStreamReceiverWhenGlobalMmiosAreInitializedThenLNCFRegistersAreConfigured) {
MockSimulatedCsrHw<FamilyType> csrSimulatedCommonHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto stream = std::make_unique<MockAubStreamMockMmioWrite>();
csrSimulatedCommonHw.stream = stream.get();
csrSimulatedCommonHw.initGlobalMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B020, 0x00300010)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B024, 0x00300010)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B028, 0x00300030)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B02C, 0x00000000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B030, 0x0030001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B034, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B038, 0x0000001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B03C, 0x00000000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B040, 0x00100000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B044, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B048, 0x0010001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B04C, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B050, 0x0030001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B054, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B058, 0x0000001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B05C, 0x00000000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B060, 0x00100000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B064, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B068, 0x0010001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B06C, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B070, 0x0030001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B074, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B078, 0x0000001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B07C, 0x00000000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B080, 0x00300030)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B084, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B088, 0x0010001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B08C, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B090, 0x0030001F)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B094, 0x00170013)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B098, 0x00300010)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B09C, 0x00300010)));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusMockSimulatedCsrHwTests, givenAubCommandStreamReceiverWhenGlobalMmiosAreInitializedThenPerfMmioRegistersAreConfigured) {
MockSimulatedCsrHw<FamilyType> csrSimulatedCommonHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto stream = std::make_unique<MockAubStreamMockMmioWrite>();
csrSimulatedCommonHw.stream = stream.get();
csrSimulatedCommonHw.initGlobalMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B004, 0x2FC0100B)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B404, 0x00000160)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00008708, 0x00000000)));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusMockSimulatedCsrHwTests, givenAubCommandStreamReceiverWhenGlobalMmiosAreInitializedThenTRTTRegistersAreConfigured) {
MockSimulatedCsrHw<FamilyType> csrSimulatedCommonHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
auto stream = std::make_unique<MockAubStreamMockMmioWrite>();
csrSimulatedCommonHw.stream = stream.get();
csrSimulatedCommonHw.initGlobalMMIO();
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004410, 0xffffffff)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004414, 0xfffffffe)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004404, 0x000000ff)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004408, 0x00000000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000440C, 0x00000000)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004400, 0x00000001)));
EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004DFC, 0x00000000)));
}
class XeHPPlusTileRangeRegisterTest : public ClDeviceFixture, public ::testing::Test {
public:
template <typename FamilyType>
void setUpImpl() {
hardwareInfo = *defaultHwInfo;
hardwareInfoSetup[hardwareInfo.platform.eProductFamily](&hardwareInfo, true, 0);
hardwareInfo.gtSystemInfo.MultiTileArchInfo.IsValid = true;
ClDeviceFixture::SetUpImpl(&hardwareInfo);
}
void SetUp() override {
}
void TearDown() override {
ClDeviceFixture::TearDown();
}
void checkMMIOs(MMIOList &list, uint32_t tilesNumber, uint32_t localMemorySizeTotalInGB) {
const uint32_t numberOfTiles = tilesNumber;
const uint32_t totalLocalMemorySizeGB = localMemorySizeTotalInGB;
MMIOPair tileAddrRegisters[] = {{0x00004900, 0x0001},
{0x00004904, 0x0001},
{0x00004908, 0x0001},
{0x0000490c, 0x0001}};
uint32_t localMemoryBase = 0x0;
for (uint32_t i = 0; i < sizeof(tileAddrRegisters) / sizeof(MMIOPair); i++) {
tileAddrRegisters[i].second |= localMemoryBase << 1;
tileAddrRegisters[i].second |= (totalLocalMemorySizeGB / numberOfTiles) << 8;
localMemoryBase += (totalLocalMemorySizeGB / numberOfTiles);
}
uint32_t mmiosFound = 0;
for (auto &mmioPair : list) {
for (uint32_t i = 0; i < numberOfTiles; i++) {
if (mmioPair.first == tileAddrRegisters[i].first && mmioPair.second == tileAddrRegisters[i].second) {
mmiosFound++;
}
}
}
EXPECT_EQ(numberOfTiles, mmiosFound);
}
};
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTileRangeRegisterTest, givenLocalMemoryEnabledWhenGlobalMmiosAreInitializedThenTileRangeRegistersAreProgrammed) {
setUpImpl<FamilyType>();
std::unique_ptr<MockSimulatedCsrHw<FamilyType>> csrSimulatedCommonHw(new MockSimulatedCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
csrSimulatedCommonHw->localMemoryEnabled = true;
auto stream = std::make_unique<MockAubStreamMockMmioWrite>();
csrSimulatedCommonHw->stream = stream.get();
csrSimulatedCommonHw->initGlobalMMIO();
checkMMIOs(stream->mmioList, 1, 32);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTileRangeRegisterTest, givenLocalMemoryEnabledAnd4TileConfigWhenGlobalMmiosAreInitializedThenTileRangeRegistersAreProgrammed) {
DebugManagerStateRestore restorer;
DebugManager.flags.CreateMultipleSubDevices.set(4);
setUpImpl<FamilyType>();
std::unique_ptr<MockSimulatedCsrHw<FamilyType>> csrSimulatedCommonHw(new MockSimulatedCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
csrSimulatedCommonHw->localMemoryEnabled = true;
auto stream = std::make_unique<MockAubStreamMockMmioWrite>();
csrSimulatedCommonHw->stream = stream.get();
csrSimulatedCommonHw->initGlobalMMIO();
checkMMIOs(stream->mmioList, 4, 32);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTileRangeRegisterTest, givenAUBDumpForceAllToLocalMemoryWhenGlobalMmiosAreInitializedThenTileRangeRegistersAreProgrammed) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
DebugManager.flags.AUBDumpForceAllToLocalMemory.set(true);
std::unique_ptr<MockSimulatedCsrHw<FamilyType>> csrSimulatedCommonHw(new MockSimulatedCsrHw<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()));
csrSimulatedCommonHw->localMemoryEnabled = true;
auto stream = std::make_unique<MockAubStreamMockMmioWrite>();
csrSimulatedCommonHw->stream = stream.get();
csrSimulatedCommonHw->initGlobalMMIO();
checkMMIOs(stream->mmioList, 1, 32);
}

View File

@ -0,0 +1,426 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/ptr_math.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_device.h"
#include "opencl/test/unit_test/command_stream/compute_mode_tests.h"
#include "opencl/test/unit_test/mocks/mock_allocation_properties.h"
#include "test.h"
#include "test_traits_common.h"
using namespace NEO;
HWCMDTEST_F(IGFX_XE_HP_CORE, ComputeModeRequirements, givenCoherencyWithoutSharedHandlesWhenCommandSizeIsCalculatedThenCorrectCommandSizeIsReturned) {
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
SetUpImpl<FamilyType>();
getCsrHw<FamilyType>()->requiredThreadArbitrationPolicy = getCsrHw<FamilyType>()->lastSentThreadArbitrationPolicy;
auto cmdsSize = sizeof(STATE_COMPUTE_MODE);
overrideComputeModeRequest<FamilyType>(false, false, false);
auto retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(0u, retSize);
overrideComputeModeRequest<FamilyType>(false, true, false);
retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(0u, retSize);
overrideComputeModeRequest<FamilyType>(true, true, false);
retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(cmdsSize, retSize);
overrideComputeModeRequest<FamilyType>(true, false, false);
retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(cmdsSize, retSize);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ComputeModeRequirements, givenCoherencyWithSharedHandlesWhenCommandSizeIsCalculatedThenCorrectCommandSizeIsReturned) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL);
overrideComputeModeRequest<FamilyType>(false, false, true);
auto retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(cmdsSize, retSize);
overrideComputeModeRequest<FamilyType>(false, true, true);
retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(cmdsSize, retSize);
overrideComputeModeRequest<FamilyType>(true, true, true);
retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(cmdsSize, retSize);
overrideComputeModeRequest<FamilyType>(true, false, true);
retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(cmdsSize, retSize);
}
struct ForceNonCoherentSupportedMatcher {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
if constexpr (HwMapper<productFamily>::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) {
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::forceNonCoherentSupported;
}
return false;
}
};
HWTEST2_F(ComputeModeRequirements, givenCoherencyWithoutSharedHandlesWhenComputeModeIsProgrammedThenCorrectCommandsAreAdded, ForceNonCoherentSupportedMatcher) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
auto cmdsSize = sizeof(STATE_COMPUTE_MODE);
char buff[1024] = {0};
LinearStream stream(buff, 1024);
auto expectedScmCmd = FamilyType::cmdInitStateComputeMode;
expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT);
auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask;
overrideComputeModeRequest<FamilyType>(true, false, false, false);
getCsrHw<FamilyType>()->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize, stream.getUsed());
auto scmCmd = reinterpret_cast<STATE_COMPUTE_MODE *>(stream.getCpuBase());
EXPECT_TRUE(isValueSet(scmCmd->getMaskBits(), expectedBitsMask));
expectedScmCmd.setMaskBits(scmCmd->getMaskBits());
EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd, sizeof(STATE_COMPUTE_MODE)) == 0);
auto startOffset = stream.getUsed();
overrideComputeModeRequest<FamilyType>(true, true, false, false);
getCsrHw<FamilyType>()->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize * 2, stream.getUsed());
expectedScmCmd = FamilyType::cmdInitStateComputeMode;
expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED);
scmCmd = reinterpret_cast<STATE_COMPUTE_MODE *>(ptrOffset(stream.getCpuBase(), startOffset));
EXPECT_TRUE(isValueSet(scmCmd->getMaskBits(), expectedBitsMask));
expectedScmCmd.setMaskBits(scmCmd->getMaskBits());
EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd, sizeof(STATE_COMPUTE_MODE)) == 0);
}
HWTEST2_F(ComputeModeRequirements, givenCoherencyWithSharedHandlesWhenComputeModeIsProgrammedThenCorrectCommandsAreAdded, ForceNonCoherentSupportedMatcher) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL);
char buff[1024] = {0};
LinearStream stream(buff, 1024);
auto expectedScmCmd = FamilyType::cmdInitStateComputeMode;
expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT);
auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask;
auto expectedPcCmd = FamilyType::cmdInitPipeControl;
overrideComputeModeRequest<FamilyType>(true, false, true, false);
getCsrHw<FamilyType>()->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize, stream.getUsed());
auto scmCmd = reinterpret_cast<STATE_COMPUTE_MODE *>(stream.getCpuBase());
EXPECT_TRUE(isValueSet(scmCmd->getMaskBits(), expectedBitsMask));
expectedScmCmd.setMaskBits(scmCmd->getMaskBits());
EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd, sizeof(STATE_COMPUTE_MODE)) == 0);
auto pcCmd = reinterpret_cast<PIPE_CONTROL *>(ptrOffset(stream.getCpuBase(), sizeof(STATE_COMPUTE_MODE)));
EXPECT_TRUE(memcmp(&expectedPcCmd, pcCmd, sizeof(PIPE_CONTROL)) == 0);
auto startOffset = stream.getUsed();
overrideComputeModeRequest<FamilyType>(true, true, true, false);
getCsrHw<FamilyType>()->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize * 2, stream.getUsed());
expectedScmCmd = FamilyType::cmdInitStateComputeMode;
expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED);
scmCmd = reinterpret_cast<STATE_COMPUTE_MODE *>(ptrOffset(stream.getCpuBase(), startOffset));
EXPECT_TRUE(isValueSet(scmCmd->getMaskBits(), expectedBitsMask));
expectedScmCmd.setMaskBits(scmCmd->getMaskBits());
EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd, sizeof(STATE_COMPUTE_MODE)) == 0);
pcCmd = reinterpret_cast<PIPE_CONTROL *>(ptrOffset(stream.getCpuBase(), startOffset + sizeof(STATE_COMPUTE_MODE)));
EXPECT_TRUE(memcmp(&expectedPcCmd, pcCmd, sizeof(PIPE_CONTROL)) == 0);
}
HWTEST2_F(ComputeModeRequirements, givenCoherencyRequirementWithoutSharedHandlesWhenFlushTaskCalledThenProgramCmdOnlyIfChanged, ForceNonCoherentSupportedMatcher) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto startOffset = getCsrHw<FamilyType>()->commandStream.getUsed();
auto graphicAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
IndirectHeap stream(graphicAlloc);
auto flushTask = [&](bool coherencyRequired) {
getCsrHw<FamilyType>()->lastSentThreadArbitrationPolicy = getCsrHw<FamilyType>()->requiredThreadArbitrationPolicy;
flags.requiresCoherency = coherencyRequired;
startOffset = getCsrHw<FamilyType>()->commandStream.getUsed();
csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device);
};
auto findCmd = [&](bool expectToBeProgrammed, bool expectCoherent) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(getCsrHw<FamilyType>()->commandStream, startOffset);
bool foundOne = false;
typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = expectCoherent ? STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED : STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT;
uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
auto cmd = genCmdCast<STATE_COMPUTE_MODE *>(*it);
if (cmd) {
EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent());
EXPECT_TRUE(isValueSet(cmd->getMaskBits(), expectedCoherentMask));
EXPECT_FALSE(foundOne);
foundOne = true;
auto pc = genCmdCast<PIPE_CONTROL *>(*(++it));
EXPECT_EQ(nullptr, pc);
}
}
EXPECT_EQ(expectToBeProgrammed, foundOne);
};
flushTask(false);
findCmd(true, false); // first time
flushTask(false);
findCmd(false, false); // not changed
flushTask(true);
findCmd(true, true); // changed
flushTask(true);
findCmd(false, true); // not changed
flushTask(false);
findCmd(true, false); // changed
flushTask(false);
findCmd(false, false); // not changed
csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc);
}
HWTEST2_F(ComputeModeRequirements, givenCoherencyRequirementWithSharedHandlesWhenFlushTaskCalledThenAlwaysProgramCmds, ForceNonCoherentSupportedMatcher) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto startOffset = getCsrHw<FamilyType>()->commandStream.getUsed();
auto graphicsAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
IndirectHeap stream(graphicsAlloc);
auto flushTask = [&](bool coherencyRequired) {
getCsrHw<FamilyType>()->lastSentThreadArbitrationPolicy = getCsrHw<FamilyType>()->requiredThreadArbitrationPolicy;
flags.requiresCoherency = coherencyRequired;
makeResidentSharedAlloc();
startOffset = getCsrHw<FamilyType>()->commandStream.getUsed();
csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device);
};
auto flushTaskAndFindCmds = [&](bool expectCoherent) {
flushTask(expectCoherent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(getCsrHw<FamilyType>()->commandStream, startOffset);
bool foundOne = false;
typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = expectCoherent ? STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED : STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT;
uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
auto cmd = genCmdCast<STATE_COMPUTE_MODE *>(*it);
if (cmd) {
EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent());
EXPECT_TRUE(isValueSet(cmd->getMaskBits(), expectedCoherentMask));
EXPECT_FALSE(foundOne);
foundOne = true;
auto pc = genCmdCast<PIPE_CONTROL *>(*(++it));
EXPECT_NE(nullptr, pc);
}
}
EXPECT_TRUE(foundOne);
};
flushTaskAndFindCmds(false); // first time
flushTaskAndFindCmds(false); // not changed
flushTaskAndFindCmds(true); // changed
flushTaskAndFindCmds(true); // not changed
flushTaskAndFindCmds(false); // changed
flushTaskAndFindCmds(false); // not changed
csr->getMemoryManager()->freeGraphicsMemory(graphicsAlloc);
}
HWTEST2_F(ComputeModeRequirements, givenFlushWithoutSharedHandlesWhenPreviouslyUsedThenProgramPcAndSCM, ForceNonCoherentSupportedMatcher) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto graphicAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
IndirectHeap stream(graphicAlloc);
makeResidentSharedAlloc();
csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device);
EXPECT_TRUE(getCsrHw<FamilyType>()->getCsrRequestFlags()->hasSharedHandles);
auto startOffset = getCsrHw<FamilyType>()->commandStream.getUsed();
csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device);
EXPECT_TRUE(getCsrHw<FamilyType>()->getCsrRequestFlags()->hasSharedHandles);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(getCsrHw<FamilyType>()->commandStream, startOffset);
typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT;
uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask;
bool foundOne = false;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
auto cmd = genCmdCast<STATE_COMPUTE_MODE *>(*it);
if (cmd) {
EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent());
EXPECT_TRUE(isValueSet(cmd->getMaskBits(), expectedCoherentMask));
EXPECT_FALSE(foundOne);
foundOne = true;
auto pc = genCmdCast<PIPE_CONTROL *>(*(++it));
EXPECT_NE(nullptr, pc);
}
}
EXPECT_TRUE(foundOne);
csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ComputeModeRequirements, givenComputeModeCmdSizeWhenLargeGrfModeChangeIsRequiredThenSCMCommandSizeIsCalculated) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto cmdSize = 0u;
overrideComputeModeRequest<FamilyType>(false, false, false, false, 128u);
auto retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(cmdSize, retSize);
cmdSize = sizeof(STATE_COMPUTE_MODE);
overrideComputeModeRequest<FamilyType>(false, false, false, true, 256u);
retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(cmdSize, retSize);
overrideComputeModeRequest<FamilyType>(true, false, false, true, 256u);
retSize = getCsrHw<FamilyType>()->getCmdSizeForComputeMode();
EXPECT_EQ(cmdSize, retSize);
}
HWTEST2_F(ComputeModeRequirements, givenComputeModeProgrammingWhenLargeGrfModeChangeIsRequiredThenCorrectCommandsAreAdded, ForceNonCoherentSupportedMatcher) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto cmdsSize = sizeof(STATE_COMPUTE_MODE);
char buff[1024];
LinearStream stream(buff, 1024);
auto expectedScmCmd = FamilyType::cmdInitStateComputeMode;
expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT);
auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask;
expectedScmCmd.setLargeGrfMode(true);
overrideComputeModeRequest<FamilyType>(false, false, false, true, 256u);
getCsrHw<FamilyType>()->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize, stream.getUsed());
auto scmCmd = reinterpret_cast<STATE_COMPUTE_MODE *>(stream.getCpuBase());
EXPECT_TRUE(isValueSet(scmCmd->getMaskBits(), expectedBitsMask));
expectedScmCmd.setMaskBits(scmCmd->getMaskBits());
EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd, sizeof(STATE_COMPUTE_MODE)) == 0);
auto startOffset = stream.getUsed();
overrideComputeModeRequest<FamilyType>(false, false, false, true, 128u);
getCsrHw<FamilyType>()->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize * 2, stream.getUsed());
expectedScmCmd = FamilyType::cmdInitStateComputeMode;
expectedScmCmd.setLargeGrfMode(false);
expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT);
scmCmd = reinterpret_cast<STATE_COMPUTE_MODE *>(ptrOffset(stream.getCpuBase(), startOffset));
EXPECT_TRUE(isValueSet(scmCmd->getMaskBits(), expectedBitsMask));
expectedScmCmd.setMaskBits(scmCmd->getMaskBits());
EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd, sizeof(STATE_COMPUTE_MODE)) == 0);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ComputeModeRequirements, givenComputeModeProgrammingWhenLargeGrfModeDoesntChangeThenSCMIsNotAdded) {
SetUpImpl<FamilyType>();
char buff[1024];
LinearStream stream(buff, 1024);
overrideComputeModeRequest<FamilyType>(false, false, false, false, 256u);
getCsrHw<FamilyType>()->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(0u, stream.getUsed());
}
HWTEST2_F(ComputeModeRequirements, givenComputeModeProgrammingWhenRequiredGRFNumberIsLowerThan128ThenSmallGRFModeIsProgrammed, ForceNonCoherentSupportedMatcher) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
auto cmdsSize = sizeof(STATE_COMPUTE_MODE);
char buff[1024];
LinearStream stream(buff, 1024);
auto expectedScmCmd = FamilyType::cmdInitStateComputeMode;
expectedScmCmd.setLargeGrfMode(false);
expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT);
auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask;
overrideComputeModeRequest<FamilyType>(false, false, false, true, 127u);
getCsrHw<FamilyType>()->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize, stream.getUsed());
auto scmCmd = reinterpret_cast<STATE_COMPUTE_MODE *>(stream.getCpuBase());
EXPECT_TRUE(isValueSet(scmCmd->getMaskBits(), expectedBitsMask));
expectedScmCmd.setMaskBits(scmCmd->getMaskBits());
EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd, sizeof(STATE_COMPUTE_MODE)) == 0);
}
HWTEST2_F(ComputeModeRequirements, givenComputeModeProgrammingWhenRequiredGRFNumberIsGreaterThan128ThenLargeGRFModeIsProgrammed, ForceNonCoherentSupportedMatcher) {
SetUpImpl<FamilyType>();
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto cmdsSize = sizeof(STATE_COMPUTE_MODE);
char buff[1024];
LinearStream stream(buff, 1024);
auto expectedScmCmd = FamilyType::cmdInitStateComputeMode;
expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT);
expectedScmCmd.setLargeGrfMode(true);
auto expectedBitsMask = FamilyType::stateComputeModeForceNonCoherentMask | FamilyType::stateComputeModeLargeGrfModeMask;
getCsrHw<FamilyType>()->requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
overrideComputeModeRequest<FamilyType>(false, false, false, true, 256u);
getCsrHw<FamilyType>()->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize, stream.getUsed());
auto scmCmd = reinterpret_cast<STATE_COMPUTE_MODE *>(stream.getCpuBase());
EXPECT_TRUE(isValueSet(scmCmd->getMaskBits(), expectedBitsMask));
expectedScmCmd.setMaskBits(scmCmd->getMaskBits());
EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd, sizeof(STATE_COMPUTE_MODE)) == 0);
}

View File

@ -192,4 +192,20 @@ HWTEST_F(PrepareDeviceEnvironmentsTest, givenPrepareDeviceEnvironmentsAndUnknown
}
}
}
TEST(MultiDeviceTests, givenCreateMultipleRootDevicesAndLimitAmountOfReturnedDevicesFlagWhenClGetDeviceIdsIsCalledThenLowerValueIsReturned) {
platformsImpl->clear();
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useHwCsr = true;
ultHwConfig.forceOsAgnosticMemoryManager = false;
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
DebugManagerStateRestore stateRestore;
DebugManager.flags.CreateMultipleRootDevices.set(2);
DebugManager.flags.LimitAmountOfReturnedDevices.set(1);
cl_uint numDevices = 0;
auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_GPU, 0, nullptr, &numDevices);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, numDevices);
}
} // namespace NEO

View File

@ -0,0 +1,16 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/implicit_scaling.h"
#include "test.h"
using namespace NEO;
TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenSupportEnabled) {
EXPECT_TRUE(ImplicitScaling::apiSupport);
}

View File

@ -0,0 +1,167 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "shared/source/memory_manager/memory_banks.h"
#include "shared/source/memory_manager/memory_pool.h"
#include "shared/source/memory_manager/physical_address_allocator.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/helpers/hw_helper_tests.h"
#include "test.h"
using namespace NEO;
struct XeHPPlusTbxCommandStreamReceiverTests : ClDeviceFixture, ::testing::Test {
template <typename FamilyType>
void setUpImpl() {
hardwareInfo = *defaultHwInfo;
hardwareInfoSetup[hardwareInfo.platform.eProductFamily](&hardwareInfo, true, 0);
hardwareInfo.gtSystemInfo.MultiTileArchInfo.IsValid = true;
ClDeviceFixture::SetUpImpl(&hardwareInfo);
}
void SetUp() override {
}
void TearDown() override {
ClDeviceFixture::TearDown();
}
};
template <typename FamilyType>
struct MockTbxCommandStreamReceiverHw : TbxCommandStreamReceiverHw<FamilyType> {
using TbxCommandStreamReceiverHw<FamilyType>::TbxCommandStreamReceiverHw;
uint32_t getDeviceIndex() const override {
return deviceIndex;
}
uint32_t deviceIndex = 0u;
};
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTbxCommandStreamReceiverTests, givenNullPtrGraphicsAlloctionWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) {
setUpImpl<FamilyType>();
auto tbxCsr = std::make_unique<MockTbxCommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
GraphicsAllocation *allocation = nullptr;
auto bits = tbxCsr->getPPGTTAdditionalBits(allocation);
EXPECT_EQ(3u, bits);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTbxCommandStreamReceiverTests, givenGraphicsAlloctionWithNonLocalMemoryPoolWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) {
setUpImpl<FamilyType>();
auto tbxCsr = std::make_unique<MockTbxCommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
MockGraphicsAllocation allocation(nullptr, 0);
auto bits = tbxCsr->getPPGTTAdditionalBits(&allocation);
EXPECT_EQ(3u, bits);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTbxCommandStreamReceiverTests, givenGraphicsAlloctionWithLocalMemoryPoolWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) {
setUpImpl<FamilyType>();
auto tbxCsr = std::make_unique<MockTbxCommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
MockGraphicsAllocation allocation(nullptr, 0);
allocation.overrideMemoryPool(MemoryPool::LocalMemory);
auto bits = tbxCsr->getPPGTTAdditionalBits(&allocation);
EXPECT_EQ(3u | (1 << 11), bits);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTbxCommandStreamReceiverTests, givenAubDumpForceAllToLocalMemoryPoolWhenGetPPGTTAdditionalBitsIsCalledThenLocalBitIsReturned) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
DebugManager.flags.AUBDumpForceAllToLocalMemory.set(true);
auto tbxCsr = std::make_unique<MockTbxCommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
MockGraphicsAllocation allocation(nullptr, 0);
auto bits = tbxCsr->getPPGTTAdditionalBits(&allocation);
EXPECT_EQ(3u | (1 << 11), bits);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTbxCommandStreamReceiverTests, givenLocalMemoryFeatureWhenGetGTTDataIsCalledThenLocalMemoryIsSet) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableLocalMemory.set(1);
hardwareInfo.featureTable.ftrLocalMemory = true;
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo));
auto tbxCsr = std::make_unique<MockTbxCommandStreamReceiverHw<FamilyType>>(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield());
tbxCsr->setupContext(*device->getDefaultEngine().osContext);
AubGTTData data = {false, false};
tbxCsr->getGTTData(nullptr, data);
EXPECT_TRUE(data.localMemory);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTbxCommandStreamReceiverTests, givenLocalMemoryEnabledWhenGetMemoryBankForGttIsCalledThenCorrectBankForDeviceIsReturned) {
setUpImpl<FamilyType>();
DebugManagerStateRestore debugRestorer;
DebugManager.flags.EnableLocalMemory.set(1);
hardwareInfo.featureTable.ftrLocalMemory = true;
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo));
auto tbxCsr = std::make_unique<MockTbxCommandStreamReceiverHw<FamilyType>>(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield());
auto bank = tbxCsr->getMemoryBankForGtt();
EXPECT_EQ(MemoryBanks::getBankForLocalMemory(0), bank);
tbxCsr->deviceIndex = 1u;
bank = tbxCsr->getMemoryBankForGtt();
EXPECT_EQ(MemoryBanks::getBankForLocalMemory(1), bank);
tbxCsr->deviceIndex = 2u;
bank = tbxCsr->getMemoryBankForGtt();
EXPECT_EQ(MemoryBanks::getBankForLocalMemory(2), bank);
tbxCsr->deviceIndex = 3u;
bank = tbxCsr->getMemoryBankForGtt();
EXPECT_EQ(MemoryBanks::getBankForLocalMemory(3), bank);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTbxCommandStreamReceiverTests, whenPhysicalAllocatorIsCreatedThenItHasCorrectBankSzieAndNumberOfBanks) {
setUpImpl<FamilyType>();
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo));
auto tbxCsr = std::make_unique<MockTbxCommandStreamReceiverHw<FamilyType>>(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield());
auto physicalAddressAllocator = tbxCsr->physicalAddressAllocator.get();
auto allocator = reinterpret_cast<PhysicalAddressAllocatorHw<FamilyType> *>(physicalAddressAllocator);
EXPECT_EQ(32 * MemoryConstants::gigaByte, allocator->getBankSize());
EXPECT_EQ(1u, allocator->getNumberOfBanks());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTbxCommandStreamReceiverTests, whenPhysicalAllocatorIsCreatedFor4TilesThenItHasCorrectBankSzieAndNumberOfBanks) {
DebugManagerStateRestore restorer;
DebugManager.flags.CreateMultipleSubDevices.set(4);
setUpImpl<FamilyType>();
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hardwareInfo));
auto tbxCsr = std::make_unique<MockTbxCommandStreamReceiverHw<FamilyType>>(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield());
auto physicalAddressAllocator = tbxCsr->physicalAddressAllocator.get();
auto allocator = reinterpret_cast<PhysicalAddressAllocatorHw<FamilyType> *>(physicalAddressAllocator);
EXPECT_EQ(8 * MemoryConstants::gigaByte, allocator->getBankSize());
EXPECT_EQ(4u, allocator->getNumberOfBanks());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPPlusTbxCommandStreamReceiverTests, whenAskedForPollForCompletionParametersThenReturnCorrectValues) {
setUpImpl<FamilyType>();
class MyMockTbxHw : public TbxCommandStreamReceiverHw<FamilyType> {
public:
MyMockTbxHw(ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
: TbxCommandStreamReceiverHw<FamilyType>(executionEnvironment, 0, deviceBitfield) {}
using TbxCommandStreamReceiverHw<FamilyType>::getpollNotEqualValueForPollForCompletion;
using TbxCommandStreamReceiverHw<FamilyType>::getMaskAndValueForPollForCompletion;
};
MyMockTbxHw myMockTbxHw(*pDevice->executionEnvironment, pDevice->getDeviceBitfield());
EXPECT_EQ(0x80u, myMockTbxHw.getMaskAndValueForPollForCompletion());
EXPECT_TRUE(myMockTbxHw.getpollNotEqualValueForPollForCompletion());
}

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/device/device.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_deferred_deleter.h"
@ -14,6 +15,7 @@
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/context/context.inl"
#include "opencl/source/device_queue/device_queue.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/sharings/sharing.h"
#include "opencl/test/unit_test/fixtures/platform_fixture.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
@ -22,8 +24,7 @@
#include "opencl/test/unit_test/mocks/mock_memory_manager.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include "gtest/gtest.h"
#include "test.h"
using namespace NEO;
@ -513,3 +514,56 @@ TEST(Context, givenContextAndDevicesWhenIsTileOnlyThenProperValueReturned) {
EXPECT_FALSE(subDevicesContext.isSingleDeviceContext());
EXPECT_FALSE(multipleDevicesContext.isSingleDeviceContext());
}
TEST(InvalidExtraPropertiesTests, givenInvalidExtraPropertiesWhenCreatingContextThenContextIsNotCreated) {
constexpr cl_context_properties INVALID_PROPERTY_TYPE = (1 << 31);
constexpr cl_context_properties INVALID_CONTEXT_FLAG = (1 << 31);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
cl_device_id deviceID = device.get();
cl_int retVal = 0;
std::unique_ptr<Context> context;
{
cl_context_properties properties[] = {INVALID_PROPERTY_TYPE, INVALID_CONTEXT_FLAG, 0};
context.reset(Context::create<Context>(properties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal));
EXPECT_EQ(CL_INVALID_PROPERTY, retVal);
EXPECT_EQ(nullptr, context.get());
}
}
using ContextCreateTests = ::testing::Test;
HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenLocalMemoryAllocationWhenBlitMemoryToAllocationIsCalledThenSuccessIsReturned) {
if (is32bit) {
GTEST_SKIP();
}
DebugManagerStateRestore restore;
DebugManager.flags.EnableLocalMemory.set(true);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
UltClDeviceFactory deviceFactory{1, 2};
ClDevice *devicesToTest[] = {deviceFactory.rootDevices[0], deviceFactory.subDevices[0], deviceFactory.subDevices[1]};
for (const auto &testedDevice : devicesToTest) {
MockContext context(testedDevice);
cl_int retVal;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(&context, {}, 1, nullptr, retVal));
auto memory = buffer->getGraphicsAllocation(testedDevice->getRootDeviceIndex());
uint8_t hostMemory[1];
auto executionEnv = testedDevice->getExecutionEnvironment();
executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false;
const auto &hwInfo = testedDevice->getHardwareInfo();
auto isBlitterRequired = HwHelper::get(hwInfo.platform.eRenderCoreFamily).isBlitCopyRequiredForLocalMemory(hwInfo, *memory);
auto expectedStatus = isBlitterRequired ? BlitOperationResult::Success : BlitOperationResult::Unsupported;
EXPECT_EQ(expectedStatus, BlitHelper::blitMemoryToAllocation(buffer->getContext()->getDevice(0)->getDevice(), memory, buffer->getOffset(), hostMemory, {1, 1, 1}));
executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
EXPECT_EQ(BlitOperationResult::Success, BlitHelper::blitMemoryToAllocation(buffer->getContext()->getDevice(0)->getDevice(), memory, buffer->getOffset(), hostMemory, {1, 1, 1}));
}
}

View File

@ -37,6 +37,7 @@ set(IGDRCL_SRCS_tests_fixtures
${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_fixture.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/multi_root_device_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/multi_tile_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/one_mip_level_image_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.cpp
${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.h

View File

@ -0,0 +1,38 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/helpers/ult_hw_config.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "opencl/source/platform/platform.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "test.h"
struct MultiTileFixture : public ::testing::Test {
void SetUp() override {
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
ultHwConfig.useHwCsr = true;
ultHwConfig.forceOsAgnosticMemoryManager = false;
DebugManager.flags.CreateMultipleSubDevices.set(requiredDeviceCount);
DebugManager.flags.DeferOsContextInitialization.set(0);
platformsImpl->clear();
constructPlatform();
initPlatform();
};
protected:
VariableBackup<UltHwConfig> backup{&ultHwConfig};
DebugManagerStateRestore stateRestore;
cl_uint requiredDeviceCount = 2u;
};
struct FourTileFixture : public MultiTileFixture {
FourTileFixture() : MultiTileFixture() { requiredDeviceCount = 4; }
};

View File

@ -62,6 +62,12 @@ set(IGDRCL_SRCS_tests_mocks
${NEO_CORE_tests_compiler_mocks}
)
if(TESTS_XEHP_PLUS)
list(APPEND IGDRCL_SRCS_tests_mocks
${CMAKE_CURRENT_SOURCE_DIR}/mock_scratch_space_controller_xehp_plus.h
)
endif()
if(WIN32)
file(GLOB IGDRCL_SRC_tests_mock_wddm "${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm2[0-9]\.*")
list(APPEND IGDRCL_SRCS_tests_mocks

View File

@ -0,0 +1,27 @@
/*
* Copyright (C) 2016-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_stream/scratch_space_controller_xehp_plus.h"
namespace NEO {
struct MockScratchSpaceControllerXeHPPlus : public ScratchSpaceControllerXeHPPlus {
using ScratchSpaceControllerXeHPPlus::computeUnitsUsedForScratch;
using ScratchSpaceControllerXeHPPlus::getOffsetToSurfaceState;
using ScratchSpaceControllerXeHPPlus::perThreadScratchSize;
using ScratchSpaceControllerXeHPPlus::privateScratchAllocation;
using ScratchSpaceControllerXeHPPlus::privateScratchSizeBytes;
using ScratchSpaceControllerXeHPPlus::scratchAllocation;
using ScratchSpaceControllerXeHPPlus::scratchSizeBytes;
using ScratchSpaceControllerXeHPPlus::ScratchSpaceControllerXeHPPlus;
using ScratchSpaceControllerXeHPPlus::singleSurfaceStateSize;
using ScratchSpaceControllerXeHPPlus::slotId;
using ScratchSpaceControllerXeHPPlus::stateSlotsCount;
using ScratchSpaceControllerXeHPPlus::surfaceStateHeap;
using ScratchSpaceControllerXeHPPlus::updateSlots;
};
} // namespace NEO

View File

@ -32,6 +32,7 @@ bool isCcs(aub_stream::EngineType engineType);
bool isBcs(aub_stream::EngineType engineType);
aub_stream::EngineType getBcsEngineType(const HardwareInfo &hwInfo, SelectorCopyEngine &selectorCopyEngine, bool internalUsage = false);
void releaseBcsEngineType(aub_stream::EngineType engineType, SelectorCopyEngine &selectorCopyEngine);
aub_stream::EngineType remapEngineTypeToHwSpecific(aub_stream::EngineType inputType, const HardwareInfo &hwInfo);
std::string engineTypeToString(aub_stream::EngineType engineType);
std::string engineTypeToStringAdditional(aub_stream::EngineType engineType);

View File

@ -23,5 +23,9 @@ std::string engineTypeToStringAdditional(aub_stream::EngineType engineType) {
return "Unknown";
}
aub_stream::EngineType remapEngineTypeToHwSpecific(aub_stream::EngineType inputType, const HardwareInfo &hwInfo) {
return inputType;
}
} // namespace EngineHelpers
} // namespace NEO