AuxTranslationMode fallback path

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2021-03-01 15:05:04 +00:00
committed by Compute-Runtime-Automation
parent 17ce62a4e8
commit 9c164a01a6
13 changed files with 98 additions and 75 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -488,5 +488,7 @@ class CommandQueueHw : public CommandQueue {
bool isGpgpuSubmissionForBcsRequired(bool queueBlocked) const;
void setupEvent(EventBuilder &eventBuilder, cl_event *outEvent, uint32_t cmdType);
bool isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo);
};
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -112,10 +112,6 @@ cl_int CommandQueueHw<Family>::enqueueMarkerForReadWriteOperation(MemObj *memObj
template <typename Family>
void CommandQueueHw<Family>::dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo,
AuxTranslationDirection auxTranslationDirection) {
if (HwHelperHw<Family>::getAuxTranslationMode() != AuxTranslationMode::Builtin) {
return;
}
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
auto &auxTranslationBuilder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(builder);
BuiltinOpParams dispatchParams;

View File

@ -61,20 +61,30 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
KernelObjsForAuxTranslation kernelObjsForAuxTranslation;
MultiDispatchInfo multiDispatchInfo(kernel);
auto auxTranslationMode = AuxTranslationMode::None;
if (DebugManager.flags.ForceDispatchScheduler.get()) {
forceDispatchScheduler(multiDispatchInfo);
} else {
auto rootDeviceIndex = device->getRootDeviceIndex();
kernel->updateAuxTranslationRequired();
if (kernel->isAuxTranslationRequired()) {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
builtInLock.takeOwnership(builder);
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation, rootDeviceIndex);
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
if (!kernelObjsForAuxTranslation.empty()) {
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
auxTranslationMode = HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
}
}
if (AuxTranslationMode::Builtin == auxTranslationMode) {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
builtInLock.takeOwnership(builder);
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
}
if (kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder == nullptr) {
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
@ -88,15 +98,14 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
return;
}
}
if (kernel->isAuxTranslationRequired()) {
if (!kernelObjsForAuxTranslation.empty()) {
UNRECOVERABLE_IF(kernel->isParentKernel);
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
}
if (AuxTranslationMode::Builtin == auxTranslationMode) {
UNRECOVERABLE_IF(kernel->isParentKernel);
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
}
}
if (ClHwHelperHw<GfxFamily>::isBlitAuxTranslationRequired(device->getHardwareInfo(), multiDispatchInfo)) {
if (AuxTranslationMode::Blit == auxTranslationMode) {
setupBlitAuxTranslation(multiDispatchInfo);
}
@ -195,7 +204,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
CsrDependencies csrDeps;
BlitPropertiesContainer blitPropertiesContainer;
bool enqueueWithBlitAuxTranslation = ClHwHelperHw<GfxFamily>::isBlitAuxTranslationRequired(device->getHardwareInfo(), multiDispatchInfo);
bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo);
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventsRequest.fillCsrDependencies(csrDeps, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
@ -223,7 +232,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
auto commandStreamStart = commandStream.getUsed();
if (ClHwHelperHw<GfxFamily>::isBlitAuxTranslationRequired(device->getHardwareInfo(), multiDispatchInfo)) {
if (enqueueWithBlitAuxTranslation) {
processDispatchForBlitAuxTranslation(multiDispatchInfo, blitPropertiesContainer, timestampPacketDependencies,
eventsRequest, blockQueue);
}
@ -1176,4 +1185,12 @@ void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dis
event);
}
}
template <typename GfxFamily>
bool CommandQueueHw<GfxFamily>::isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo) {
return multiDispatchInfo.getKernelObjsForAuxTranslation() &&
(multiDispatchInfo.getKernelObjsForAuxTranslation()->size() > 0) &&
(HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo()) == AuxTranslationMode::Blit);
}
} // namespace NEO

View File

@ -46,7 +46,6 @@ class ClHwHelperHw : public ClHwHelper {
return clHwHelper;
}
static bool isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo);
bool requiresAuxResolves(const KernelInfo &kernelInfo) const override;
bool allowRenderCompressionForContext(const HardwareInfo &hwInfo, const Context &context) const override;
cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const override;

View File

@ -13,14 +13,6 @@
namespace NEO {
template <typename Family>
bool ClHwHelperHw<Family>::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo) {
return (HwHelperHw<Family>::getAuxTranslationMode() == AuxTranslationMode::Blit) &&
hwInfo.capabilityTable.blitterOperationsSupported &&
multiDispatchInfo.getKernelObjsForAuxTranslation() &&
(multiDispatchInfo.getKernelObjsForAuxTranslation()->size() > 0);
}
template <typename GfxFamily>
inline bool ClHwHelperHw<GfxFamily>::requiresAuxResolves(const KernelInfo &kernelInfo) const {
return hasStatelessAccessToBuffer(kernelInfo);

View File

@ -514,6 +514,7 @@ HWTEST2_P(AuxBuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalle
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
} else {
auto gfxAllocation = mockKernelObjForAuxTranslation.mockGraphicsAllocation.get();
gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
auto ptr = reinterpret_cast<void *>(gfxAllocation->getGpuAddressToPatch());
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, gfxAllocation);
}
@ -525,6 +526,7 @@ HWTEST2_P(AuxBuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalle
mockKernel.mockKernel->auxTranslationRequired = true;
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, mockBuiltinKernel->takeOwnershipCalls);
EXPECT_EQ(1u, mockBuiltinKernel->releaseOwnershipCalls);
}

View File

@ -96,6 +96,35 @@ HWTEST_F(CommandQueueHwTest, WhenConstructingTwoCommandQueuesThenOnlyOneDebugSur
EXPECT_EQ(dbgSurface, device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation());
}
HWTEST_F(CommandQueueHwTest, givenMultiDispatchInfoWhenAskingForAuxTranslationThenCheckMemObjectsCountAndDebugFlag) {
DebugManagerStateRestore restore;
MockBuffer buffer;
KernelObjsForAuxTranslation kernelObjects;
MultiDispatchInfo multiDispatchInfo;
HardwareInfo *hwInfo = pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo();
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
MockCommandQueueHw<FamilyType> mockCmdQueueHw(context, pClDevice, nullptr);
hwInfo->capabilityTable.blitterOperationsSupported = true;
EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo));
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjects);
EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo));
kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, &buffer});
EXPECT_TRUE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo));
hwInfo->capabilityTable.blitterOperationsSupported = false;
EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo));
hwInfo->capabilityTable.blitterOperationsSupported = true;
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Builtin));
EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo));
}
HWTEST_F(CommandQueueHwTest, WhenEnqueuingBlockedMapUnmapOperationThenVirtualEventIsCreated) {
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);

View File

@ -930,10 +930,14 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto hwInfo = pDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo();
MockKernelWithInternals mockKernel(*pClDevice, context);
MyCmdQ<FamilyType> cmdQ(context, pClDevice);
cmdQ.bcsEngine = cmdQ.gpgpuEngine;
hwInfo->capabilityTable.blitterOperationsSupported = true;
size_t gws[3] = {1, 0, 0};
MockBuffer buffer;
cl_mem clMem = &buffer;
@ -947,15 +951,7 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
// aux builtin not dispatched before NDR
EXPECT_EQ(0u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(0)));
// only NDR is dispatched
EXPECT_EQ(1u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(1)));
auto kernel = std::get<Kernel *>(cmdQ.dispatchAuxTranslationInputs.at(1));
EXPECT_FALSE(kernel->isBuiltIn);
EXPECT_EQ(0u, cmdQ.dispatchAuxTranslationInputs.size());
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker) {

View File

@ -734,52 +734,33 @@ HWTEST_F(HwHelperTest, whenQueryingMaxNumSamplersThenReturnSixteen) {
EXPECT_EQ(16u, helper.getMaxNumSamplers());
}
HWTEST_F(HwHelperTest, givenMultiDispatchInfoWhenAskingForAuxTranslationThenCheckMemObjectsCountAndDebugFlag) {
DebugManagerStateRestore restore;
MockBuffer buffer;
KernelObjsForAuxTranslation kernelObjects;
MultiDispatchInfo multiDispatchInfo;
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = true;
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
EXPECT_FALSE(ClHwHelperHw<FamilyType>::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo));
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjects);
EXPECT_FALSE(ClHwHelperHw<FamilyType>::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo));
kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, &buffer});
EXPECT_TRUE(ClHwHelperHw<FamilyType>::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo));
hwInfo.capabilityTable.blitterOperationsSupported = false;
EXPECT_FALSE(ClHwHelperHw<FamilyType>::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo));
hwInfo.capabilityTable.blitterOperationsSupported = true;
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Builtin));
EXPECT_FALSE(ClHwHelperHw<FamilyType>::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo));
}
HWTEST_F(HwHelperTest, givenDebugVariableSetWhenAskingForAuxTranslationModeThenReturnCorrectValue) {
DebugManagerStateRestore restore;
EXPECT_EQ(UnitTestHelper<FamilyType>::requiredAuxTranslationMode, HwHelperHw<FamilyType>::getAuxTranslationMode());
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = true;
if (HwHelperHw<FamilyType>::getAuxTranslationMode() == AuxTranslationMode::Blit) {
auto hwInfoConfig = HwInfoConfig::get(productFamily);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr);
EXPECT_TRUE(hwInfo.capabilityTable.blitterOperationsSupported);
EXPECT_EQ(UnitTestHelper<FamilyType>::requiredAuxTranslationMode, HwHelperHw<FamilyType>::getAuxTranslationMode(hwInfo));
if (HwHelperHw<FamilyType>::getAuxTranslationMode(hwInfo) == AuxTranslationMode::Blit) {
hwInfo.capabilityTable.blitterOperationsSupported = false;
EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw<FamilyType>::getAuxTranslationMode(hwInfo));
}
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::None));
EXPECT_EQ(AuxTranslationMode::None, HwHelperHw<FamilyType>::getAuxTranslationMode());
EXPECT_EQ(AuxTranslationMode::None, HwHelperHw<FamilyType>::getAuxTranslationMode(hwInfo));
hwInfo.capabilityTable.blitterOperationsSupported = false;
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
EXPECT_EQ(AuxTranslationMode::Blit, HwHelperHw<FamilyType>::getAuxTranslationMode());
EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw<FamilyType>::getAuxTranslationMode(hwInfo));
hwInfo.capabilityTable.blitterOperationsSupported = true;
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Blit));
EXPECT_EQ(AuxTranslationMode::Blit, HwHelperHw<FamilyType>::getAuxTranslationMode(hwInfo));
DebugManager.flags.ForceAuxTranslationMode.set(static_cast<int32_t>(AuxTranslationMode::Builtin));
EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw<FamilyType>::getAuxTranslationMode());
EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw<FamilyType>::getAuxTranslationMode(hwInfo));
}
HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForTilingSupportThenReturnValidValue) {

View File

@ -203,6 +203,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
using BaseClass::commandQueueProperties;
using BaseClass::commandStream;
using BaseClass::gpgpuEngine;
using BaseClass::isBlitAuxTranslationRequired;
using BaseClass::latestSentEnqueueType;
using BaseClass::obtainCommandStream;
using BaseClass::obtainNewTimestampPacketNodes;

View File

@ -35,6 +35,7 @@ struct MockKernelObjForAuxTranslation : public KernelObjForAuxTranslation {
MockKernelObjForAuxTranslation(Type type) : KernelObjForAuxTranslation(type, nullptr) {
if (type == KernelObjForAuxTranslation::Type::MEM_OBJ) {
mockBuffer.reset(new MockBuffer);
mockBuffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
this->object = mockBuffer.get();
} else {
DEBUG_BREAK_IF(type != KernelObjForAuxTranslation::Type::GFX_ALLOC);

View File

@ -149,7 +149,7 @@ class HwHelper {
template <typename GfxFamily>
class HwHelperHw : public HwHelper {
public:
static HwHelper &get() {
static HwHelperHw<GfxFamily> &get() {
static HwHelperHw<GfxFamily> hwHelper;
return hwHelper;
}
@ -268,7 +268,7 @@ class HwHelperHw : public HwHelper {
uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) override;
static AuxTranslationMode getAuxTranslationMode();
static AuxTranslationMode getAuxTranslationMode(const HardwareInfo &hwInfo);
uint32_t getHwRevIdFromStepping(uint32_t stepping, const HardwareInfo &hwInfo) const override;

View File

@ -173,12 +173,19 @@ bool HwHelperHw<Family>::getEnableLocalMemory(const HardwareInfo &hwInfo) const
}
template <typename Family>
AuxTranslationMode HwHelperHw<Family>::getAuxTranslationMode() {
AuxTranslationMode HwHelperHw<Family>::getAuxTranslationMode(const HardwareInfo &hwInfo) {
auto mode = HwHelperHw<Family>::defaultAuxTranslationMode;
if (DebugManager.flags.ForceAuxTranslationMode.get() != -1) {
return static_cast<AuxTranslationMode>(DebugManager.flags.ForceAuxTranslationMode.get());
mode = static_cast<AuxTranslationMode>(DebugManager.flags.ForceAuxTranslationMode.get());
}
return HwHelperHw<Family>::defaultAuxTranslationMode;
if (mode == AuxTranslationMode::Blit && !hwInfo.capabilityTable.blitterOperationsSupported) {
DEBUG_BREAK_IF(true);
mode = AuxTranslationMode::Builtin;
}
return mode;
}
template <typename GfxFamily>