mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Move BTI programming to shared code
Change-Id: Ie9d67c1d883f24cfec13ea1618d834d746c0d5be Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
493434c8e9
commit
bf32740f97
@ -10,7 +10,6 @@
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "test.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
@ -113,7 +112,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind
|
||||
auto dsh = commandList->commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
|
||||
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress()));
|
||||
|
||||
if (NEO::HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
|
||||
if (NEO::EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
|
||||
uint32_t numArgs = kernel->kernelImmData->getDescriptor().payloadMappings.bindingTable.numEntries;
|
||||
EXPECT_EQ(numArgs, idd->getBindingTableEntryCount());
|
||||
} else {
|
||||
|
@ -186,11 +186,11 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
|
||||
totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
|
||||
surfaceStateHeap.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
auto btOffset = HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(surfaceStateHeap, bindingTableCount,
|
||||
pBlockInfo->heapInfo.pSsh,
|
||||
pBlockInfo->heapInfo.SurfaceStateHeapSize,
|
||||
bindingTableCount,
|
||||
pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
auto btOffset = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(surfaceStateHeap, bindingTableCount,
|
||||
pBlockInfo->heapInfo.pSsh,
|
||||
pBlockInfo->heapInfo.SurfaceStateHeapSize,
|
||||
bindingTableCount,
|
||||
pBlockInfo->patchInfo.bindingTableState->Offset);
|
||||
|
||||
parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast<uint32_t>(btOffset));
|
||||
|
||||
|
@ -14,10 +14,5 @@
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
bool HardwareCommandsHelper<ICLFamily>::doBindingTablePrefetch() {
|
||||
return false;
|
||||
}
|
||||
|
||||
template struct HardwareCommandsHelper<ICLFamily>;
|
||||
} // namespace NEO
|
||||
|
@ -23,10 +23,5 @@ size_t HardwareCommandsHelper<TGLLPFamily>::getSizeRequiredCS(const Kernel *kern
|
||||
return size;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HardwareCommandsHelper<TGLLPFamily>::doBindingTablePrefetch() {
|
||||
return false;
|
||||
}
|
||||
|
||||
template struct HardwareCommandsHelper<TGLLPFamily>;
|
||||
} // namespace NEO
|
||||
|
@ -7,7 +7,6 @@
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
|
||||
#include "opencl/source/helpers/per_thread_data.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
@ -78,10 +77,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
uint32_t &sizeCrossThreadData);
|
||||
|
||||
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount,
|
||||
const void *srcKernelSsh, size_t srcKernelSshSize,
|
||||
size_t numberOfBindingTableStates, size_t offsetOfBindingTable);
|
||||
|
||||
static size_t sendIndirectState(
|
||||
LinearStream &commandStream,
|
||||
IndirectHeap &dsh,
|
||||
@ -143,8 +138,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
|
||||
static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress);
|
||||
|
||||
static bool doBindingTablePrefetch();
|
||||
|
||||
static bool inlineDataProgrammingRequired(const Kernel &kernel);
|
||||
static bool kernelUsesLocalIds(const Kernel &kernel);
|
||||
};
|
||||
|
@ -198,57 +198,6 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
return (size_t)offsetInterfaceDescriptor;
|
||||
}
|
||||
|
||||
// Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base addess)
|
||||
// as required by the INTERFACE_DESCRIPTOR_DATA.
|
||||
template <typename GfxFamily>
|
||||
size_t HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount,
|
||||
const void *srcKernelSsh, size_t srcKernelSshSize,
|
||||
size_t numberOfBindingTableStates, size_t offsetOfBindingTable) {
|
||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
|
||||
if (bindingTableCount == 0) {
|
||||
// according to compiler, kernel does not reference BTIs to stateful surfaces, so there's nothing to patch
|
||||
return 0;
|
||||
}
|
||||
size_t sshSize = srcKernelSshSize;
|
||||
DEBUG_BREAK_IF(srcKernelSsh == nullptr);
|
||||
|
||||
auto srcSurfaceState = srcKernelSsh;
|
||||
// Allocate space for new ssh data
|
||||
auto dstSurfaceState = dstHeap.getSpace(sshSize);
|
||||
|
||||
// Compiler sends BTI table that is already populated with surface state pointers relative to local SSH.
|
||||
// We may need to patch these pointers so that they are relative to surface state base address
|
||||
if (dstSurfaceState == dstHeap.getCpuBase()) {
|
||||
// nothing to patch, we're at the start of heap (which is assumed to be the surface state base address)
|
||||
// we need to simply copy the ssh (including BTIs from compiler)
|
||||
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, sshSize);
|
||||
return offsetOfBindingTable;
|
||||
}
|
||||
|
||||
// We can copy-over the surface states, but BTIs will need to be patched
|
||||
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, offsetOfBindingTable);
|
||||
|
||||
uint32_t surfaceStatesOffset = static_cast<uint32_t>(ptrDiff(dstSurfaceState, dstHeap.getCpuBase()));
|
||||
|
||||
// march over BTIs and offset the pointers based on surface state base address
|
||||
auto *dstBtiTableBase = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(dstSurfaceState, offsetOfBindingTable));
|
||||
DEBUG_BREAK_IF(reinterpret_cast<uintptr_t>(dstBtiTableBase) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE != 0);
|
||||
auto *srcBtiTableBase = reinterpret_cast<const BINDING_TABLE_STATE *>(ptrOffset(srcSurfaceState, offsetOfBindingTable));
|
||||
BINDING_TABLE_STATE bti = GfxFamily::cmdInitBindingTableState;
|
||||
for (uint32_t i = 0, e = (uint32_t)numberOfBindingTableStates; i != e; ++i) {
|
||||
uint32_t localSurfaceStateOffset = srcBtiTableBase[i].getSurfaceStatePointer();
|
||||
uint32_t offsetedSurfaceStateOffset = localSurfaceStateOffset + surfaceStatesOffset;
|
||||
bti.setSurfaceStatePointer(offsetedSurfaceStateOffset); // patch just the SurfaceStatePointer bits
|
||||
dstBtiTableBase[i] = bti;
|
||||
DEBUG_BREAK_IF(bti.getRawData(0) % sizeof(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE) != 0);
|
||||
}
|
||||
|
||||
return ptrDiff(dstBtiTableBase, dstHeap.getCpuBase());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
LinearStream &commandStream,
|
||||
@ -278,9 +227,9 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
kernel.patchBindlessSurfaceStateOffsets(ssh.getUsed());
|
||||
|
||||
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0,
|
||||
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
|
||||
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
|
||||
auto dstBindingTablePointer = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0,
|
||||
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
|
||||
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
|
||||
|
||||
// Copy our sampler state if it exists
|
||||
uint32_t samplerStateOffset = 0;
|
||||
@ -378,11 +327,6 @@ void HardwareCommandsHelper<GfxFamily>::updatePerThreadDataTotal(
|
||||
DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HardwareCommandsHelper<GfxFamily>::doBindingTablePrefetch() {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(const Kernel &kernel) {
|
||||
auto checkKernelForInlineData = true;
|
||||
|
@ -139,7 +139,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HardwareCommandsHelper<GfxFamily>::resetBindingTablePrefetch(Kernel &kernel) {
|
||||
return kernel.isSchedulerKernel || !doBindingTablePrefetch();
|
||||
return kernel.isSchedulerKernel || !EncodeSurfaceState<GfxFamily>::doBindingTablePrefetch();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -18,8 +18,3 @@ GEN11TEST_F(Gen11KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturn
|
||||
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
|
||||
EXPECT_TRUE(retVal);
|
||||
}
|
||||
|
||||
using Gen11HardwareCommandsTest = testing::Test;
|
||||
GEN11TEST_F(Gen11HardwareCommandsTest, givenGen11PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsFalse) {
|
||||
EXPECT_FALSE(HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch());
|
||||
}
|
||||
|
@ -18,7 +18,3 @@ GEN12LPTEST_F(Gen12LpKernelTest, givenKernelWhenCanTransformImagesIsCalledThenRe
|
||||
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
|
||||
EXPECT_FALSE(retVal);
|
||||
}
|
||||
using Gen12LpHardwareCommandsTest = testing::Test;
|
||||
GEN12LPTEST_F(Gen12LpHardwareCommandsTest, givenGen12LpPlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
|
||||
EXPECT_FALSE(HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch());
|
||||
}
|
||||
|
@ -18,7 +18,3 @@ GEN8TEST_F(Gen8KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsF
|
||||
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
|
||||
EXPECT_FALSE(retVal);
|
||||
}
|
||||
using Gen8HardwareCommandsTest = testing::Test;
|
||||
GEN8TEST_F(Gen8HardwareCommandsTest, givenGen8PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
|
||||
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch());
|
||||
}
|
||||
|
@ -19,7 +19,3 @@ GEN9TEST_F(Gen9KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsT
|
||||
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
|
||||
EXPECT_TRUE(retVal);
|
||||
}
|
||||
using Gen9HardwareCommandsTest = testing::Test;
|
||||
GEN9TEST_F(Gen9HardwareCommandsTest, givenGen9PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
|
||||
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch());
|
||||
}
|
||||
|
@ -405,7 +405,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
|
||||
true);
|
||||
|
||||
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
|
||||
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
|
||||
EXPECT_EQ(expectedBindingTableCount, interfaceDescriptor->getBindingTableEntryCount());
|
||||
} else {
|
||||
EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount());
|
||||
@ -493,7 +493,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
|
||||
true);
|
||||
|
||||
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
|
||||
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
|
||||
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
|
||||
EXPECT_EQ(31u, interfaceDescriptor->getBindingTableEntryCount());
|
||||
} else {
|
||||
EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount());
|
||||
@ -833,7 +833,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTableStatesForKernelThenSshIsNotUsed) {
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
@ -889,7 +888,6 @@ HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTabl
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStatesThenPointerIsZero) {
|
||||
|
||||
// define kernel info
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
@ -44,8 +45,8 @@ struct HardwareCommandsTest : ClDeviceFixture,
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
|
||||
return HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0,
|
||||
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
|
||||
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
|
||||
return EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0,
|
||||
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
|
||||
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
|
||||
}
|
||||
};
|
||||
|
@ -19,6 +19,7 @@
|
||||
namespace NEO {
|
||||
|
||||
class GmmHelper;
|
||||
class IndirectHeap;
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct EncodeDispatchKernel {
|
||||
@ -207,6 +208,11 @@ struct EncodeSurfaceState {
|
||||
static constexpr uintptr_t getSurfaceBaseAddressAlignment() { return 4; }
|
||||
|
||||
static void getSshAlignedPointer(uintptr_t &ptr, size_t &offset);
|
||||
static bool doBindingTablePrefetch();
|
||||
|
||||
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount,
|
||||
const void *srcKernelSsh, size_t srcKernelSshSize,
|
||||
size_t numberOfBindingTableStates, size_t offsetOfBindingTable);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -10,15 +10,16 @@
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/local_id_gen.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@ -56,6 +57,12 @@ uint32_t EncodeStates<Family>::copySamplerState(IndirectHeap *dsh,
|
||||
|
||||
return samplerStateOffsetInDsh;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress) {
|
||||
int logLws = 0;
|
||||
@ -208,26 +215,6 @@ void EncodeMath<Family>::addition(CommandContainer &container,
|
||||
finalResultRegister);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), GPUGPU_DISPATCHDIM[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIM[i], lws[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline void EncodeSetMMIO<Family>::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap) {
|
||||
LriHelper<Family>::program(container.getCommandStream(),
|
||||
@ -308,6 +295,75 @@ void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_
|
||||
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeSurfaceState<Family>::getSshAlignedPointer(uintptr_t &ptr, size_t &offset) {
|
||||
auto sshAlignmentMask =
|
||||
getSurfaceBaseAddressAlignmentMask();
|
||||
uintptr_t alignedPtr = ptr & sshAlignmentMask;
|
||||
|
||||
offset = 0;
|
||||
if (ptr != alignedPtr) {
|
||||
offset = ptrDiff(ptr, alignedPtr);
|
||||
ptr = alignedPtr;
|
||||
}
|
||||
}
|
||||
|
||||
// Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base addess)
|
||||
// as required by the INTERFACE_DESCRIPTOR_DATA.
|
||||
template <typename Family>
|
||||
size_t EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount,
|
||||
const void *srcKernelSsh, size_t srcKernelSshSize,
|
||||
size_t numberOfBindingTableStates, size_t offsetOfBindingTable) {
|
||||
using BINDING_TABLE_STATE = typename Family::BINDING_TABLE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA;
|
||||
using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE;
|
||||
|
||||
if (bindingTableCount == 0) {
|
||||
// according to compiler, kernel does not reference BTIs to stateful surfaces, so there's nothing to patch
|
||||
return 0;
|
||||
}
|
||||
size_t sshSize = srcKernelSshSize;
|
||||
DEBUG_BREAK_IF(srcKernelSsh == nullptr);
|
||||
|
||||
auto srcSurfaceState = srcKernelSsh;
|
||||
// Allocate space for new ssh data
|
||||
auto dstSurfaceState = dstHeap.getSpace(sshSize);
|
||||
|
||||
// Compiler sends BTI table that is already populated with surface state pointers relative to local SSH.
|
||||
// We may need to patch these pointers so that they are relative to surface state base address
|
||||
if (dstSurfaceState == dstHeap.getCpuBase()) {
|
||||
// nothing to patch, we're at the start of heap (which is assumed to be the surface state base address)
|
||||
// we need to simply copy the ssh (including BTIs from compiler)
|
||||
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, sshSize);
|
||||
return offsetOfBindingTable;
|
||||
}
|
||||
|
||||
// We can copy-over the surface states, but BTIs will need to be patched
|
||||
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, offsetOfBindingTable);
|
||||
|
||||
uint32_t surfaceStatesOffset = static_cast<uint32_t>(ptrDiff(dstSurfaceState, dstHeap.getCpuBase()));
|
||||
|
||||
// march over BTIs and offset the pointers based on surface state base address
|
||||
auto *dstBtiTableBase = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(dstSurfaceState, offsetOfBindingTable));
|
||||
DEBUG_BREAK_IF(reinterpret_cast<uintptr_t>(dstBtiTableBase) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE != 0);
|
||||
auto *srcBtiTableBase = reinterpret_cast<const BINDING_TABLE_STATE *>(ptrOffset(srcSurfaceState, offsetOfBindingTable));
|
||||
BINDING_TABLE_STATE bti = Family::cmdInitBindingTableState;
|
||||
for (uint32_t i = 0, e = static_cast<uint32_t>(numberOfBindingTableStates); i != e; ++i) {
|
||||
uint32_t localSurfaceStateOffset = srcBtiTableBase[i].getSurfaceStatePointer();
|
||||
uint32_t offsetedSurfaceStateOffset = localSurfaceStateOffset + surfaceStatesOffset;
|
||||
bti.setSurfaceStatePointer(offsetedSurfaceStateOffset); // patch just the SurfaceStatePointer bits
|
||||
dstBtiTableBase[i] = bti;
|
||||
DEBUG_BREAK_IF(bti.getRawData(0) % sizeof(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE) != 0);
|
||||
}
|
||||
|
||||
return ptrDiff(dstBtiTableBase, dstHeap.getCpuBase());
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool EncodeSurfaceState<Family>::doBindingTablePrefetch() {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {
|
||||
|
||||
@ -372,8 +428,23 @@ bool EncodeDispatchKernel<Family>::inlineDataProgrammingRequired(const KernelDes
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
|
||||
return 0;
|
||||
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), GPUGPU_DISPATCHDIM[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIM[i], lws[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
@ -440,15 +511,15 @@ void EncodeAtomic<Family>::programMiAtomic(MI_ATOMIC *atomic,
|
||||
*atomic = cmd;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void EncodeAtomic<GfxFamily>::programMiAtomic(LinearStream &commandStream,
|
||||
uint64_t writeAddress,
|
||||
ATOMIC_OPCODES opcode,
|
||||
DATA_SIZE dataSize,
|
||||
uint32_t returnDataControl,
|
||||
uint32_t csStall) {
|
||||
template <typename Family>
|
||||
void EncodeAtomic<Family>::programMiAtomic(LinearStream &commandStream,
|
||||
uint64_t writeAddress,
|
||||
ATOMIC_OPCODES opcode,
|
||||
DATA_SIZE dataSize,
|
||||
uint32_t returnDataControl,
|
||||
uint32_t csStall) {
|
||||
auto miAtomic = commandStream.getSpaceForCmd<MI_ATOMIC>();
|
||||
EncodeAtomic<GfxFamily>::programMiAtomic(miAtomic, writeAddress, opcode, dataSize, returnDataControl, csStall);
|
||||
EncodeAtomic<Family>::programMiAtomic(miAtomic, writeAddress, opcode, dataSize, returnDataControl, csStall);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
@ -472,19 +543,6 @@ void EncodeBatchBufferStartOrEnd<Family>::programBatchBufferEnd(CommandContainer
|
||||
*buffer = cmd;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeSurfaceState<Family>::getSshAlignedPointer(uintptr_t &ptr, size_t &offset) {
|
||||
auto sshAlignmentMask =
|
||||
getSurfaceBaseAddressAlignmentMask();
|
||||
uintptr_t alignedPtr = ptr & sshAlignmentMask;
|
||||
|
||||
offset = 0;
|
||||
if (ptr != alignedPtr) {
|
||||
offset = ptrDiff(ptr, alignedPtr);
|
||||
ptr = alignedPtr;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void EncodeMiFlushDW<GfxFamily>::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, bool timeStampOperation, bool commandWithPostSync) {
|
||||
programMiFlushDwWA(commandStream);
|
||||
|
@ -16,8 +16,6 @@
|
||||
#include "shared/source/helpers/state_base_address.h"
|
||||
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
||||
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
|
||||
#include "pipe_control_args.h"
|
||||
|
||||
#include <algorithm>
|
||||
@ -86,7 +84,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
if (bindingTableStateCount > 0u) {
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
sshOffset = ssh->getUsed();
|
||||
bindingTablePointer = static_cast<uint32_t>(HardwareCommandsHelper<Family>::pushBindingTableAndSurfaceStates(
|
||||
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
|
||||
*ssh, bindingTableStateCount,
|
||||
dispatchInterface->getSurfaceStateHeapData(),
|
||||
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
|
||||
@ -96,7 +94,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
idd.setBindingTablePointer(bindingTablePointer);
|
||||
|
||||
uint32_t bindingTableStatePrefetchCount = 0;
|
||||
if (HardwareCommandsHelper<Family>::doBindingTablePrefetch()) {
|
||||
if (EncodeSurfaceState<Family>::doBindingTablePrefetch()) {
|
||||
bindingTableStatePrefetchCount = std::min(31u, bindingTableStateCount);
|
||||
}
|
||||
idd.setBindingTableEntryCount(bindingTableStatePrefetchCount);
|
||||
|
@ -16,6 +16,12 @@ using Family = NEO::ICLFamily;
|
||||
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
bool EncodeSurfaceState<Family>::doBindingTablePrefetch() {
|
||||
return false;
|
||||
}
|
||||
|
||||
template struct EncodeDispatchKernel<Family>;
|
||||
template struct EncodeStates<Family>;
|
||||
template struct EncodeMath<Family>;
|
||||
|
@ -15,6 +15,7 @@ using Family = NEO::TGLLPFamily;
|
||||
#include "shared/source/command_container/command_encoder.inl"
|
||||
#include "shared/source/command_container/command_encoder_base.inl"
|
||||
#include "shared/source/command_container/encode_compute_mode_tgllp_plus.inl"
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
namespace NEO {
|
||||
template <>
|
||||
@ -65,6 +66,11 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfac
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool EncodeSurfaceState<Family>::doBindingTablePrefetch() {
|
||||
return false;
|
||||
}
|
||||
|
||||
template struct EncodeDispatchKernel<Family>;
|
||||
template struct EncodeStates<Family>;
|
||||
template struct EncodeMath<Family>;
|
||||
|
@ -17,6 +17,7 @@ if(TESTS_GEN11)
|
||||
|
||||
target_sources(${TARGET_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tests_gen11.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_gen11.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen11.cpp
|
||||
${COMPUTE_RUNTIME_ULT_GEN11}
|
||||
|
17
shared/test/unit_test/gen11/command_encoder_tests_gen11.cpp
Normal file
17
shared/test/unit_test/gen11/command_encoder_tests_gen11.cpp
Normal file
@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using Gen11CommandEncodeTest = testing::Test;
|
||||
GEN11TEST_F(Gen11CommandEncodeTest, givenGen11PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsFalse) {
|
||||
EXPECT_FALSE(EncodeSurfaceState<FamilyType>::doBindingTablePrefetch());
|
||||
}
|
@ -17,6 +17,7 @@ if(TESTS_GEN12LP)
|
||||
|
||||
target_sources(${TARGET_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tests_gen12lp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_gen12lp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen12lp.cpp
|
||||
${COMPUTE_RUNTIME_ULT_GEN12LP}
|
||||
|
@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using Gen12LpCommandEncodeTest = testing::Test;
|
||||
GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
|
||||
EXPECT_FALSE(EncodeSurfaceState<FamilyType>::doBindingTablePrefetch());
|
||||
}
|
@ -16,6 +16,7 @@ if(TESTS_GEN8)
|
||||
|
||||
target_sources(${TARGET_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tests_gen8.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_gen8.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_tests_gen8.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen8.cpp
|
||||
|
17
shared/test/unit_test/gen8/command_encoder_tests_gen8.cpp
Normal file
17
shared/test/unit_test/gen8/command_encoder_tests_gen8.cpp
Normal file
@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using Gen8CommandEncodeTest = testing::Test;
|
||||
GEN8TEST_F(Gen8CommandEncodeTest, givenGen8PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
|
||||
EXPECT_TRUE(EncodeSurfaceState<FamilyType>::doBindingTablePrefetch());
|
||||
}
|
@ -17,6 +17,7 @@ if(TESTS_GEN9)
|
||||
|
||||
target_sources(${TARGET_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tests_gen9.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_gen9.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen9.cpp
|
||||
${COMPUTE_RUNTIME_ULT_GEN9}
|
||||
|
17
shared/test/unit_test/gen9/command_encoder_tests_gen9.cpp
Normal file
17
shared/test/unit_test/gen9/command_encoder_tests_gen9.cpp
Normal file
@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using Gen9CommandEncodeTest = testing::Test;
|
||||
GEN9TEST_F(Gen9CommandEncodeTest, givenGen9PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
|
||||
EXPECT_TRUE(EncodeSurfaceState<FamilyType>::doBindingTablePrefetch());
|
||||
}
|
Reference in New Issue
Block a user