mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 17:41:26 +08:00
Refactor implicit scaling parameters for surface state
Related-To: NEO-6589 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
79c8605ed2
commit
c36c083812
@@ -266,6 +266,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.useGlobalAtomics = kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics;
|
||||
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
|
||||
args.implicitScaling = this->partitionCount > 1;
|
||||
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -63,12 +63,14 @@ struct KernelHw : public KernelImp {
|
||||
bufferSizeForSsh = alignUp(bufferSizeForSsh, alignment);
|
||||
|
||||
bool l3Enabled = true;
|
||||
|
||||
// Allocation MUST be cacheline (64 byte) aligned in order to enable L3 caching otherwise Heap corruption will occur coming from the KMD.
|
||||
// Most commonly this issue will occur with Host Point Allocations from customers.
|
||||
l3Enabled = isL3Capable(*alloc);
|
||||
|
||||
auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
|
||||
Device *device = module->getDevice();
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
|
||||
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
|
||||
if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) {
|
||||
l3Enabled = false;
|
||||
}
|
||||
@@ -77,18 +79,17 @@ struct KernelHw : public KernelImp {
|
||||
this->kernelRequiresQueueUncachedMocsCount++;
|
||||
}
|
||||
|
||||
NEO::Device *neoDevice = module->getDevice()->getNEODevice();
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = &surfaceState;
|
||||
args.graphicsAddress = bufferAddressForSsh;
|
||||
args.size = bufferSizeForSsh;
|
||||
args.mocs = this->module->getDevice()->getMOCS(l3Enabled, false);
|
||||
args.mocs = device->getMOCS(l3Enabled, false);
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.allocation = alloc;
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.useGlobalAtomics = kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics;
|
||||
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
|
||||
args.implicitScaling = device->isImplicitScalingCapable();
|
||||
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress) = surfaceState;
|
||||
|
||||
@@ -84,7 +84,8 @@ KernelImmutableData::~KernelImmutableData() {
|
||||
|
||||
inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef<uint8_t> surfaceStateHeap,
|
||||
uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation,
|
||||
const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool useGlobalAtomics) {
|
||||
const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool useGlobalAtomics,
|
||||
bool implicitScaling) {
|
||||
if (false == crossThreadData.empty()) {
|
||||
NEO::patchPointer(crossThreadData, ptr, ptrToPatchInCrossThreadData);
|
||||
}
|
||||
@@ -107,6 +108,7 @@ inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef
|
||||
args.numAvailableDevices = device.getNumGenericSubDevices();
|
||||
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
|
||||
args.mocs = hwHelper.getMocsIndex(*args.gmmHelper, true, false) << 1;
|
||||
args.implicitScaling = implicitScaling;
|
||||
|
||||
hwHelper.encodeBufferSurfaceState(args);
|
||||
}
|
||||
@@ -179,7 +181,7 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
|
||||
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
||||
static_cast<uintptr_t>(globalConstBuffer->getGpuAddressToPatch()),
|
||||
*globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress,
|
||||
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
|
||||
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics, deviceImp->isImplicitScalingCapable());
|
||||
this->residencyContainer.push_back(globalConstBuffer);
|
||||
} else if (nullptr != globalConstBuffer) {
|
||||
this->residencyContainer.push_back(globalConstBuffer);
|
||||
@@ -191,7 +193,7 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
|
||||
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
||||
static_cast<uintptr_t>(globalVarBuffer->getGpuAddressToPatch()),
|
||||
*globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress,
|
||||
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
|
||||
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics, deviceImp->isImplicitScalingCapable());
|
||||
this->residencyContainer.push_back(globalVarBuffer);
|
||||
} else if (nullptr != globalVarBuffer) {
|
||||
this->residencyContainer.push_back(globalVarBuffer);
|
||||
@@ -758,7 +760,7 @@ void KernelImp::patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocatio
|
||||
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
||||
static_cast<uintptr_t>(privateAllocation->getGpuAddressToPatch()),
|
||||
*privateAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress,
|
||||
*device->getNEODevice(), kernelAttributes.flags.useGlobalAtomics);
|
||||
*device->getNEODevice(), kernelAttributes.flags.useGlobalAtomics, device->isImplicitScalingCapable());
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
@@ -941,7 +943,7 @@ void KernelImp::setDebugSurface() {
|
||||
patchWithImplicitSurface(ArrayRef<uint8_t>(), surfaceStateHeapRef,
|
||||
0,
|
||||
*device->getDebugSurface(), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.systemThreadSurfaceAddress,
|
||||
*device->getNEODevice(), getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics);
|
||||
*device->getNEODevice(), getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, device->isImplicitScalingCapable());
|
||||
}
|
||||
}
|
||||
void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/helpers/file_io.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
@@ -392,5 +393,26 @@ struct ImportHostPointerModuleFixture : public ModuleFixture {
|
||||
void *hostPointer = nullptr;
|
||||
};
|
||||
|
||||
struct MultiTileModuleFixture : public MultiDeviceModuleFixture {
|
||||
void SetUp() {
|
||||
DebugManager.flags.EnableImplicitScaling.set(1);
|
||||
MultiDeviceFixture::numRootDevices = 1u;
|
||||
MultiDeviceFixture::numSubDevices = 2u;
|
||||
|
||||
MultiDeviceModuleFixture::SetUp();
|
||||
createModuleFromBinary(0);
|
||||
|
||||
device = driverHandle->devices[0];
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
MultiDeviceModuleFixture::TearDown();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore debugRestore;
|
||||
VariableBackup<bool> backup{&NEO::ImplicitScaling::apiSupport, true};
|
||||
L0::Device *device = nullptr;
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -2328,5 +2328,34 @@ TEST_F(KernelImplicitArgTests, givenKernelWithoutImplicitArgsWhenPatchingImplici
|
||||
EXPECT_EQ(0, memcmp(data, initData, 64));
|
||||
}
|
||||
|
||||
using MultiTileModuleTest = Test<MultiTileModuleFixture>;
|
||||
|
||||
HWTEST2_F(MultiTileModuleTest, GivenMultiTileDeviceWhenSettingKernelArgAndSurfaceStateThenMultiTileFlagsAreSetCorrectly, IsXEHP) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = modules[0].get();
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
arg.bindless = undefined<CrossThreadDataOffset>;
|
||||
arg.bindful = 0x40;
|
||||
|
||||
constexpr size_t size = 128;
|
||||
uint64_t gpuAddress = 0x2000;
|
||||
char bufferArray[size] = {};
|
||||
void *buffer = reinterpret_cast<void *>(bufferArray);
|
||||
NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
|
||||
|
||||
mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
|
||||
|
||||
void *surfaceStateAddress = ptrOffset(mockKernel.surfaceStateHeapData.get(), arg.bindful);
|
||||
RENDER_SURFACE_STATE *surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateAddress);
|
||||
EXPECT_FALSE(surfaceState->getDisableSupportForMultiGpuAtomics());
|
||||
EXPECT_FALSE(surfaceState->getDisableSupportForMultiGpuPartialWrites());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user