Move BTI programming to shared code

Change-Id: Ie9d67c1d883f24cfec13ea1618d834d746c0d5be
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2020-10-07 14:22:26 +02:00
committed by sys_ocldev
parent 493434c8e9
commit bf32740f97
26 changed files with 211 additions and 157 deletions

View File

@ -10,7 +10,6 @@
#include "shared/source/helpers/register_offsets.h"
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "test.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
@ -113,7 +112,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind
auto dsh = commandList->commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress()));
if (NEO::HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
if (NEO::EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
uint32_t numArgs = kernel->kernelImmData->getDescriptor().payloadMappings.bindingTable.numEntries;
EXPECT_EQ(numArgs, idd->getBindingTableEntryCount());
} else {

View File

@ -186,11 +186,11 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
surfaceStateHeap.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
auto btOffset = HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(surfaceStateHeap, bindingTableCount,
pBlockInfo->heapInfo.pSsh,
pBlockInfo->heapInfo.SurfaceStateHeapSize,
bindingTableCount,
pBlockInfo->patchInfo.bindingTableState->Offset);
auto btOffset = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(surfaceStateHeap, bindingTableCount,
pBlockInfo->heapInfo.pSsh,
pBlockInfo->heapInfo.SurfaceStateHeapSize,
bindingTableCount,
pBlockInfo->patchInfo.bindingTableState->Offset);
parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast<uint32_t>(btOffset));

View File

@ -14,10 +14,5 @@
namespace NEO {
template <>
bool HardwareCommandsHelper<ICLFamily>::doBindingTablePrefetch() {
return false;
}
template struct HardwareCommandsHelper<ICLFamily>;
} // namespace NEO

View File

@ -23,10 +23,5 @@ size_t HardwareCommandsHelper<TGLLPFamily>::getSizeRequiredCS(const Kernel *kern
return size;
}
template <>
bool HardwareCommandsHelper<TGLLPFamily>::doBindingTablePrefetch() {
return false;
}
template struct HardwareCommandsHelper<TGLLPFamily>;
} // namespace NEO

View File

@ -7,7 +7,6 @@
#pragma once
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "opencl/source/helpers/per_thread_data.h"
#include "opencl/source/kernel/kernel.h"
@ -78,10 +77,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
WALKER_TYPE<GfxFamily> *walkerCmd,
uint32_t &sizeCrossThreadData);
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount,
const void *srcKernelSsh, size_t srcKernelSshSize,
size_t numberOfBindingTableStates, size_t offsetOfBindingTable);
static size_t sendIndirectState(
LinearStream &commandStream,
IndirectHeap &dsh,
@ -143,8 +138,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress);
static bool doBindingTablePrefetch();
static bool inlineDataProgrammingRequired(const Kernel &kernel);
static bool kernelUsesLocalIds(const Kernel &kernel);
};

View File

@ -198,57 +198,6 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
return (size_t)offsetInterfaceDescriptor;
}
// Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base addess)
// as required by the INTERFACE_DESCRIPTOR_DATA.
template <typename GfxFamily>
size_t HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount,
const void *srcKernelSsh, size_t srcKernelSshSize,
size_t numberOfBindingTableStates, size_t offsetOfBindingTable) {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
if (bindingTableCount == 0) {
// according to compiler, kernel does not reference BTIs to stateful surfaces, so there's nothing to patch
return 0;
}
size_t sshSize = srcKernelSshSize;
DEBUG_BREAK_IF(srcKernelSsh == nullptr);
auto srcSurfaceState = srcKernelSsh;
// Allocate space for new ssh data
auto dstSurfaceState = dstHeap.getSpace(sshSize);
// Compiler sends BTI table that is already populated with surface state pointers relative to local SSH.
// We may need to patch these pointers so that they are relative to surface state base address
if (dstSurfaceState == dstHeap.getCpuBase()) {
// nothing to patch, we're at the start of heap (which is assumed to be the surface state base address)
// we need to simply copy the ssh (including BTIs from compiler)
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, sshSize);
return offsetOfBindingTable;
}
// We can copy-over the surface states, but BTIs will need to be patched
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, offsetOfBindingTable);
uint32_t surfaceStatesOffset = static_cast<uint32_t>(ptrDiff(dstSurfaceState, dstHeap.getCpuBase()));
// march over BTIs and offset the pointers based on surface state base address
auto *dstBtiTableBase = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(dstSurfaceState, offsetOfBindingTable));
DEBUG_BREAK_IF(reinterpret_cast<uintptr_t>(dstBtiTableBase) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE != 0);
auto *srcBtiTableBase = reinterpret_cast<const BINDING_TABLE_STATE *>(ptrOffset(srcSurfaceState, offsetOfBindingTable));
BINDING_TABLE_STATE bti = GfxFamily::cmdInitBindingTableState;
for (uint32_t i = 0, e = (uint32_t)numberOfBindingTableStates; i != e; ++i) {
uint32_t localSurfaceStateOffset = srcBtiTableBase[i].getSurfaceStatePointer();
uint32_t offsetedSurfaceStateOffset = localSurfaceStateOffset + surfaceStatesOffset;
bti.setSurfaceStatePointer(offsetedSurfaceStateOffset); // patch just the SurfaceStatePointer bits
dstBtiTableBase[i] = bti;
DEBUG_BREAK_IF(bti.getRawData(0) % sizeof(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE) != 0);
}
return ptrDiff(dstBtiTableBase, dstHeap.getCpuBase());
}
template <typename GfxFamily>
size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
LinearStream &commandStream,
@ -278,9 +227,9 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
kernel.patchBindlessSurfaceStateOffsets(ssh.getUsed());
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0,
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
auto dstBindingTablePointer = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0,
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
// Copy our sampler state if it exists
uint32_t samplerStateOffset = 0;
@ -378,11 +327,6 @@ void HardwareCommandsHelper<GfxFamily>::updatePerThreadDataTotal(
DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group
}
template <typename GfxFamily>
bool HardwareCommandsHelper<GfxFamily>::doBindingTablePrefetch() {
return true;
}
template <typename GfxFamily>
bool HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(const Kernel &kernel) {
auto checkKernelForInlineData = true;

View File

@ -139,7 +139,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
template <typename GfxFamily>
bool HardwareCommandsHelper<GfxFamily>::resetBindingTablePrefetch(Kernel &kernel) {
return kernel.isSchedulerKernel || !doBindingTablePrefetch();
return kernel.isSchedulerKernel || !EncodeSurfaceState<GfxFamily>::doBindingTablePrefetch();
}
template <typename GfxFamily>

View File

@ -18,8 +18,3 @@ GEN11TEST_F(Gen11KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturn
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
EXPECT_TRUE(retVal);
}
using Gen11HardwareCommandsTest = testing::Test;
GEN11TEST_F(Gen11HardwareCommandsTest, givenGen11PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsFalse) {
EXPECT_FALSE(HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch());
}

View File

@ -18,7 +18,3 @@ GEN12LPTEST_F(Gen12LpKernelTest, givenKernelWhenCanTransformImagesIsCalledThenRe
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
EXPECT_FALSE(retVal);
}
using Gen12LpHardwareCommandsTest = testing::Test;
GEN12LPTEST_F(Gen12LpHardwareCommandsTest, givenGen12LpPlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
EXPECT_FALSE(HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch());
}

View File

@ -18,7 +18,3 @@ GEN8TEST_F(Gen8KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsF
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
EXPECT_FALSE(retVal);
}
using Gen8HardwareCommandsTest = testing::Test;
GEN8TEST_F(Gen8HardwareCommandsTest, givenGen8PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch());
}

View File

@ -19,7 +19,3 @@ GEN9TEST_F(Gen9KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsT
auto retVal = mockKernel.mockKernel->Kernel::canTransformImages();
EXPECT_TRUE(retVal);
}
using Gen9HardwareCommandsTest = testing::Test;
GEN9TEST_F(Gen9HardwareCommandsTest, givenGen9PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch());
}

View File

@ -405,7 +405,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
true);
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
EXPECT_EQ(expectedBindingTableCount, interfaceDescriptor->getBindingTableEntryCount());
} else {
EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount());
@ -493,7 +493,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
true);
auto interfaceDescriptor = reinterpret_cast<INTERFACE_DESCRIPTOR_DATA *>(dsh.getCpuBase());
if (HardwareCommandsHelper<FamilyType>::doBindingTablePrefetch()) {
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
EXPECT_EQ(31u, interfaceDescriptor->getBindingTableEntryCount());
} else {
EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount());
@ -833,7 +833,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
}
HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTableStatesForKernelThenSshIsNotUsed) {
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();
@ -889,7 +888,6 @@ HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTabl
}
HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStatesThenPointerIsZero) {
// define kernel info
auto pKernelInfo = std::make_unique<KernelInfo>();

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/command_container/command_encoder.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/kernel/kernel.h"
@ -44,8 +45,8 @@ struct HardwareCommandsTest : ClDeviceFixture,
template <typename GfxFamily>
size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
return HardwareCommandsHelper<GfxFamily>::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0,
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
return EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0,
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
}
};

View File

@ -19,6 +19,7 @@
namespace NEO {
class GmmHelper;
class IndirectHeap;
template <typename GfxFamily>
struct EncodeDispatchKernel {
@ -207,6 +208,11 @@ struct EncodeSurfaceState {
static constexpr uintptr_t getSurfaceBaseAddressAlignment() { return 4; }
static void getSshAlignedPointer(uintptr_t &ptr, size_t &offset);
static bool doBindingTablePrefetch();
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount,
const void *srcKernelSsh, size_t srcKernelSshSize,
size_t numberOfBindingTableStates, size_t offsetOfBindingTable);
};
template <typename GfxFamily>

View File

@ -10,15 +10,16 @@
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/device/device.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/local_id_gen.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/helpers/simd_helper.h"
#include "shared/source/helpers/string.h"
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include <algorithm>
@ -56,6 +57,12 @@ uint32_t EncodeStates<Family>::copySamplerState(IndirectHeap *dsh,
return samplerStateOffsetInDsh;
}
template <typename Family>
size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
return 0;
}
template <typename Family>
void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress) {
int logLws = 0;
@ -208,26 +215,6 @@ void EncodeMath<Family>::addition(CommandContainer &container,
finalResultRegister);
}
template <typename Family>
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress) {
for (int i = 0; i < 3; ++i) {
if (NEO::isUndefinedOffset(offsets[i])) {
continue;
}
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), GPUGPU_DISPATCHDIM[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
}
}
template <typename Family>
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws) {
for (int i = 0; i < 3; ++i) {
if (NEO::isUndefinedOffset(offsets[i])) {
continue;
}
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIM[i], lws[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
}
}
template <typename Family>
inline void EncodeSetMMIO<Family>::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap) {
LriHelper<Family>::program(container.getCommandStream(),
@ -308,6 +295,75 @@ void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices);
}
template <typename Family>
void EncodeSurfaceState<Family>::getSshAlignedPointer(uintptr_t &ptr, size_t &offset) {
auto sshAlignmentMask =
getSurfaceBaseAddressAlignmentMask();
uintptr_t alignedPtr = ptr & sshAlignmentMask;
offset = 0;
if (ptr != alignedPtr) {
offset = ptrDiff(ptr, alignedPtr);
ptr = alignedPtr;
}
}
// Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base addess)
// as required by the INTERFACE_DESCRIPTOR_DATA.
template <typename Family>
size_t EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount,
const void *srcKernelSsh, size_t srcKernelSshSize,
size_t numberOfBindingTableStates, size_t offsetOfBindingTable) {
using BINDING_TABLE_STATE = typename Family::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA;
using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE;
if (bindingTableCount == 0) {
// according to compiler, kernel does not reference BTIs to stateful surfaces, so there's nothing to patch
return 0;
}
size_t sshSize = srcKernelSshSize;
DEBUG_BREAK_IF(srcKernelSsh == nullptr);
auto srcSurfaceState = srcKernelSsh;
// Allocate space for new ssh data
auto dstSurfaceState = dstHeap.getSpace(sshSize);
// Compiler sends BTI table that is already populated with surface state pointers relative to local SSH.
// We may need to patch these pointers so that they are relative to surface state base address
if (dstSurfaceState == dstHeap.getCpuBase()) {
// nothing to patch, we're at the start of heap (which is assumed to be the surface state base address)
// we need to simply copy the ssh (including BTIs from compiler)
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, sshSize);
return offsetOfBindingTable;
}
// We can copy-over the surface states, but BTIs will need to be patched
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, offsetOfBindingTable);
uint32_t surfaceStatesOffset = static_cast<uint32_t>(ptrDiff(dstSurfaceState, dstHeap.getCpuBase()));
// march over BTIs and offset the pointers based on surface state base address
auto *dstBtiTableBase = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(dstSurfaceState, offsetOfBindingTable));
DEBUG_BREAK_IF(reinterpret_cast<uintptr_t>(dstBtiTableBase) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE != 0);
auto *srcBtiTableBase = reinterpret_cast<const BINDING_TABLE_STATE *>(ptrOffset(srcSurfaceState, offsetOfBindingTable));
BINDING_TABLE_STATE bti = Family::cmdInitBindingTableState;
for (uint32_t i = 0, e = static_cast<uint32_t>(numberOfBindingTableStates); i != e; ++i) {
uint32_t localSurfaceStateOffset = srcBtiTableBase[i].getSurfaceStatePointer();
uint32_t offsetedSurfaceStateOffset = localSurfaceStateOffset + surfaceStatesOffset;
bti.setSurfaceStatePointer(offsetedSurfaceStateOffset); // patch just the SurfaceStatePointer bits
dstBtiTableBase[i] = bti;
DEBUG_BREAK_IF(bti.getRawData(0) % sizeof(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE) != 0);
}
return ptrDiff(dstBtiTableBase, dstHeap.getCpuBase());
}
template <typename Family>
bool EncodeSurfaceState<Family>::doBindingTablePrefetch() {
return true;
}
template <typename Family>
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {
@ -372,8 +428,23 @@ bool EncodeDispatchKernel<Family>::inlineDataProgrammingRequired(const KernelDes
}
template <typename Family>
size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
return 0;
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress) {
for (int i = 0; i < 3; ++i) {
if (NEO::isUndefinedOffset(offsets[i])) {
continue;
}
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), GPUGPU_DISPATCHDIM[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
}
}
template <typename Family>
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws) {
for (int i = 0; i < 3; ++i) {
if (NEO::isUndefinedOffset(offsets[i])) {
continue;
}
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIM[i], lws[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
}
}
template <typename Family>
@ -440,15 +511,15 @@ void EncodeAtomic<Family>::programMiAtomic(MI_ATOMIC *atomic,
*atomic = cmd;
}
template <typename GfxFamily>
void EncodeAtomic<GfxFamily>::programMiAtomic(LinearStream &commandStream,
uint64_t writeAddress,
ATOMIC_OPCODES opcode,
DATA_SIZE dataSize,
uint32_t returnDataControl,
uint32_t csStall) {
template <typename Family>
void EncodeAtomic<Family>::programMiAtomic(LinearStream &commandStream,
uint64_t writeAddress,
ATOMIC_OPCODES opcode,
DATA_SIZE dataSize,
uint32_t returnDataControl,
uint32_t csStall) {
auto miAtomic = commandStream.getSpaceForCmd<MI_ATOMIC>();
EncodeAtomic<GfxFamily>::programMiAtomic(miAtomic, writeAddress, opcode, dataSize, returnDataControl, csStall);
EncodeAtomic<Family>::programMiAtomic(miAtomic, writeAddress, opcode, dataSize, returnDataControl, csStall);
}
template <typename Family>
@ -472,19 +543,6 @@ void EncodeBatchBufferStartOrEnd<Family>::programBatchBufferEnd(CommandContainer
*buffer = cmd;
}
template <typename Family>
void EncodeSurfaceState<Family>::getSshAlignedPointer(uintptr_t &ptr, size_t &offset) {
auto sshAlignmentMask =
getSurfaceBaseAddressAlignmentMask();
uintptr_t alignedPtr = ptr & sshAlignmentMask;
offset = 0;
if (ptr != alignedPtr) {
offset = ptrDiff(ptr, alignedPtr);
ptr = alignedPtr;
}
}
template <typename GfxFamily>
void EncodeMiFlushDW<GfxFamily>::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, bool timeStampOperation, bool commandWithPostSync) {
programMiFlushDwWA(commandStream);

View File

@ -16,8 +16,6 @@
#include "shared/source/helpers/state_base_address.h"
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "pipe_control_args.h"
#include <algorithm>
@ -86,7 +84,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
if (bindingTableStateCount > 0u) {
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
sshOffset = ssh->getUsed();
bindingTablePointer = static_cast<uint32_t>(HardwareCommandsHelper<Family>::pushBindingTableAndSurfaceStates(
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh, bindingTableStateCount,
dispatchInterface->getSurfaceStateHeapData(),
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
@ -96,7 +94,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
idd.setBindingTablePointer(bindingTablePointer);
uint32_t bindingTableStatePrefetchCount = 0;
if (HardwareCommandsHelper<Family>::doBindingTablePrefetch()) {
if (EncodeSurfaceState<Family>::doBindingTablePrefetch()) {
bindingTableStatePrefetchCount = std::min(31u, bindingTableStateCount);
}
idd.setBindingTableEntryCount(bindingTableStatePrefetchCount);

View File

@ -16,6 +16,12 @@ using Family = NEO::ICLFamily;
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
namespace NEO {
template <>
bool EncodeSurfaceState<Family>::doBindingTablePrefetch() {
return false;
}
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@ -15,6 +15,7 @@ using Family = NEO::TGLLPFamily;
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_tgllp_plus.inl"
#include "shared/source/command_stream/command_stream_receiver.h"
namespace NEO {
template <>
@ -65,6 +66,11 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfac
}
}
template <>
bool EncodeSurfaceState<Family>::doBindingTablePrefetch() {
return false;
}
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;

View File

@ -17,6 +17,7 @@ if(TESTS_GEN11)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tests_gen11.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_gen11.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen11.cpp
${COMPUTE_RUNTIME_ULT_GEN11}

View File

@ -0,0 +1,17 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "test.h"
using namespace NEO;
using Gen11CommandEncodeTest = testing::Test;
GEN11TEST_F(Gen11CommandEncodeTest, givenGen11PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsFalse) {
EXPECT_FALSE(EncodeSurfaceState<FamilyType>::doBindingTablePrefetch());
}

View File

@ -17,6 +17,7 @@ if(TESTS_GEN12LP)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tests_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_gen12lp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen12lp.cpp
${COMPUTE_RUNTIME_ULT_GEN12LP}

View File

@ -0,0 +1,17 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "test.h"
using namespace NEO;
using Gen12LpCommandEncodeTest = testing::Test;
GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
EXPECT_FALSE(EncodeSurfaceState<FamilyType>::doBindingTablePrefetch());
}

View File

@ -16,6 +16,7 @@ if(TESTS_GEN8)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tests_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_tests_gen8.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen8.cpp

View File

@ -0,0 +1,17 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "test.h"
using namespace NEO;
using Gen8CommandEncodeTest = testing::Test;
GEN8TEST_F(Gen8CommandEncodeTest, givenGen8PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
EXPECT_TRUE(EncodeSurfaceState<FamilyType>::doBindingTablePrefetch());
}

View File

@ -17,6 +17,7 @@ if(TESTS_GEN9)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tests_gen9.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_gen9.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen9.cpp
${COMPUTE_RUNTIME_ULT_GEN9}

View File

@ -0,0 +1,17 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "test.h"
using namespace NEO;
using Gen9CommandEncodeTest = testing::Test;
GEN9TEST_F(Gen9CommandEncodeTest, givenGen9PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) {
EXPECT_TRUE(EncodeSurfaceState<FamilyType>::doBindingTablePrefetch());
}