mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
DispatchKernelEncoder refactor
Replacing parts of DispatchKernelEncoder with KernelDescriptor Change-Id: I1c780b04a2d3d1de0fb75d5413a0dde8b41bbe07
This commit is contained in:

committed by
sys_ocldev

parent
ea56bde3fb
commit
2c25777f3c
@ -155,7 +155,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
|
||||
const void **pRanges);
|
||||
|
||||
ze_result_t setGroupSizeIndirect(uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]);
|
||||
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, uint32_t lws[3]);
|
||||
void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker);
|
||||
void appendSignalEventPostWalker(ze_event_handle_t hEvent);
|
||||
|
||||
|
@ -1176,13 +1176,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::setGroupSizeIndirect(uint32_t offsets[3],
|
||||
void *crossThreadAddress,
|
||||
uint32_t lws[3]) {
|
||||
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, uint32_t lws[3]) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
NEO::EncodeIndirectParams<GfxFamily>::setGroupSizeIndirect(commandContainer, offsets, crossThreadAddress, lws);
|
||||
NEO::EncodeIndirectParams<GfxFamily>::setGlobalWorkSizeIndirect(commandContainer, offsets, crossThreadAddress, lws);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
@ -29,10 +29,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
const auto kernel = Kernel::fromHandle(hKernel);
|
||||
UNRECOVERABLE_IF(kernel == nullptr);
|
||||
const auto functionImmutableData = kernel->getImmutableData();
|
||||
commandListPerThreadScratchSize = std::max(commandListPerThreadScratchSize, kernel->getPerThreadScratchSize());
|
||||
commandListPerThreadScratchSize = std::max<std::uint32_t>(commandListPerThreadScratchSize, kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
|
||||
|
||||
auto functionPreemptionMode = obtainFunctionPreemptionMode(kernel);
|
||||
commandListPreemptionMode = std::min(commandListPreemptionMode, functionPreemptionMode);
|
||||
auto kernelPreemptionMode = obtainFunctionPreemptionMode(kernel);
|
||||
commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);
|
||||
|
||||
if (!isIndirect) {
|
||||
kernel->setGroupCount(pThreadGroupDimensions->groupCountX,
|
||||
|
@ -98,8 +98,6 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
|
||||
|
||||
virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, const void *argVal, NEO::GraphicsAllocation *allocation) = 0;
|
||||
virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) = 0;
|
||||
virtual bool getGroupCountOffsets(uint32_t *locations) = 0;
|
||||
virtual bool getGroupSizeOffsets(uint32_t *locations) = 0;
|
||||
virtual ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
uint32_t groupSizeZ) = 0;
|
||||
virtual ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY,
|
||||
@ -113,22 +111,6 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
|
||||
|
||||
virtual const std::vector<NEO::GraphicsAllocation *> &getResidencyContainer() const = 0;
|
||||
|
||||
virtual void getGroupSize(uint32_t &outGroupSizeX, uint32_t &outGroupSizeY, uint32_t &outGroupSizeZ) const = 0;
|
||||
virtual uint32_t getThreadsPerThreadGroup() const = 0;
|
||||
virtual uint32_t getThreadExecutionMask() const = 0;
|
||||
|
||||
virtual const uint8_t *getCrossThreadData() const = 0;
|
||||
virtual uint32_t getCrossThreadDataSize() const = 0;
|
||||
|
||||
virtual const uint8_t *getPerThreadData() const = 0;
|
||||
virtual uint32_t getPerThreadDataSizeForWholeThreadGroup() const = 0;
|
||||
virtual uint32_t getPerThreadDataSize() const = 0;
|
||||
virtual const uint8_t *getSurfaceStateHeapData() const = 0;
|
||||
virtual uint32_t getSurfaceStateHeapDataSize() const = 0;
|
||||
|
||||
virtual const uint8_t *getDynamicStateHeapData() const = 0;
|
||||
virtual size_t getDynamicStateHeapDataSize() const = 0;
|
||||
|
||||
virtual UnifiedMemoryControls getUnifiedMemoryControls() const = 0;
|
||||
virtual bool hasIndirectAllocationsAllowed() const = 0;
|
||||
|
||||
|
@ -77,7 +77,7 @@ struct KernelHw : public KernelImp {
|
||||
}
|
||||
|
||||
std::copy(this->groupSize, this->groupSize + 3, cloned->groupSize);
|
||||
cloned->threadsPerThreadGroup = this->threadsPerThreadGroup;
|
||||
cloned->numThreadsPerThreadGroup = this->numThreadsPerThreadGroup;
|
||||
cloned->threadExecutionMask = this->threadExecutionMask;
|
||||
|
||||
if (this->surfaceStateHeapDataSize > 0) {
|
||||
|
@ -185,14 +185,9 @@ ze_result_t KernelImp::setArgumentValue(uint32_t argIndex, size_t argSize,
|
||||
}
|
||||
|
||||
void KernelImp::setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) {
|
||||
uint32_t groupSizeX;
|
||||
uint32_t groupSizeY;
|
||||
uint32_t groupSizeZ;
|
||||
getGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
|
||||
const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor();
|
||||
uint32_t globalWorkSize[3] = {groupCountX * groupSizeX, groupCountY * groupSizeY,
|
||||
groupCountZ * groupSizeZ};
|
||||
uint32_t globalWorkSize[3] = {groupCountX * groupSize[0], groupCountY * groupSize[1],
|
||||
groupCountZ * groupSize[2]};
|
||||
auto dst = ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize);
|
||||
NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.globalWorkSize, globalWorkSize);
|
||||
|
||||
@ -200,30 +195,6 @@ void KernelImp::setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32
|
||||
NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.numWorkGroups, groupCount);
|
||||
}
|
||||
|
||||
bool KernelImp::getGroupCountOffsets(uint32_t *locations) {
|
||||
const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (NEO::isValidOffset(desc.payloadMappings.dispatchTraits.numWorkGroups[i])) {
|
||||
locations[i] = desc.payloadMappings.dispatchTraits.numWorkGroups[i];
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool KernelImp::getGroupSizeOffsets(uint32_t *locations) {
|
||||
const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (NEO::isValidOffset(desc.payloadMappings.dispatchTraits.globalWorkSize[i])) {
|
||||
locations[i] = desc.payloadMappings.dispatchTraits.globalWorkSize[i];
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
uint32_t groupSizeZ) {
|
||||
if ((0 == groupSizeX) || (0 == groupSizeY) || (0 == groupSizeZ)) {
|
||||
@ -267,8 +238,8 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
this->groupSize[2] = groupSizeZ;
|
||||
|
||||
auto simdSize = kernelImmData->getDescriptor().kernelAttributes.simdSize;
|
||||
this->threadsPerThreadGroup = static_cast<uint32_t>((itemsInGroup + simdSize - 1u) / simdSize);
|
||||
this->perThreadDataSize = perThreadDataSizeForWholeThreadGroup / threadsPerThreadGroup;
|
||||
this->numThreadsPerThreadGroup = static_cast<uint32_t>((itemsInGroup + simdSize - 1u) / simdSize);
|
||||
this->perThreadDataSize = perThreadDataSizeForWholeThreadGroup / numThreadsPerThreadGroup;
|
||||
patchWorkgroupSizeInCrossThreadData(groupSizeX, groupSizeY, groupSizeZ);
|
||||
|
||||
auto remainderSimdLanes = itemsInGroup & (simdSize - 1u);
|
||||
@ -297,7 +268,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
|
||||
uint32_t numThreadsPerSubSlice = (uint32_t)deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU;
|
||||
uint32_t localMemSize = (uint32_t)deviceInfo.localMemSize;
|
||||
|
||||
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, this->hasBarriers(), simd, this->getSlmTotalSize(),
|
||||
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.flags.usesBarriers, simd, this->getSlmTotalSize(),
|
||||
coreFamily, numThreadsPerSubSlice, localMemSize,
|
||||
usesImages, false);
|
||||
NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim);
|
||||
@ -672,92 +643,12 @@ bool KernelImp::hasIndirectAllocationsAllowed() const {
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed);
|
||||
}
|
||||
|
||||
bool KernelImp::hasBarriers() {
|
||||
return getImmutableData()->getDescriptor().kernelAttributes.flags.usesBarriers;
|
||||
}
|
||||
uint32_t KernelImp::getSlmTotalSize() {
|
||||
uint32_t KernelImp::getSlmTotalSize() const {
|
||||
return slmArgsTotalSize + getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize;
|
||||
}
|
||||
uint32_t KernelImp::getBindingTableOffset() {
|
||||
return getImmutableData()->getDescriptor().payloadMappings.bindingTable.tableOffset;
|
||||
}
|
||||
uint32_t KernelImp::getBorderColor() {
|
||||
return getImmutableData()->getDescriptor().payloadMappings.samplerTable.borderColor;
|
||||
}
|
||||
uint32_t KernelImp::getSamplerTableOffset() {
|
||||
return getImmutableData()->getDescriptor().payloadMappings.samplerTable.tableOffset;
|
||||
}
|
||||
uint32_t KernelImp::getNumSurfaceStates() {
|
||||
return getImmutableData()->getDescriptor().payloadMappings.bindingTable.numEntries;
|
||||
}
|
||||
uint32_t KernelImp::getNumSamplers() {
|
||||
return getImmutableData()->getDescriptor().payloadMappings.samplerTable.numSamplers;
|
||||
}
|
||||
uint32_t KernelImp::getSimdSize() {
|
||||
return getImmutableData()->getDescriptor().kernelAttributes.simdSize;
|
||||
}
|
||||
uint32_t KernelImp::getSizeCrossThreadData() {
|
||||
return getCrossThreadDataSize();
|
||||
}
|
||||
uint32_t KernelImp::getPerThreadScratchSize() {
|
||||
return getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0];
|
||||
}
|
||||
uint32_t KernelImp::getThreadsPerThreadGroupCount() {
|
||||
return getThreadsPerThreadGroup();
|
||||
}
|
||||
uint32_t KernelImp::getSizePerThreadData() {
|
||||
return getPerThreadDataSize();
|
||||
}
|
||||
uint32_t KernelImp::getSizePerThreadDataForWholeGroup() {
|
||||
return getPerThreadDataSizeForWholeThreadGroup();
|
||||
}
|
||||
uint32_t KernelImp::getSizeSurfaceStateHeapData() {
|
||||
return getSurfaceStateHeapDataSize();
|
||||
}
|
||||
uint32_t KernelImp::getPerThreadExecutionMask() {
|
||||
return getThreadExecutionMask();
|
||||
}
|
||||
uint32_t *KernelImp::getCountOffsets() {
|
||||
return groupCountOffsets;
|
||||
}
|
||||
uint32_t *KernelImp::getSizeOffsets() {
|
||||
return groupSizeOffsets;
|
||||
}
|
||||
uint32_t *KernelImp::getLocalWorkSize() {
|
||||
if (hasGroupSize()) {
|
||||
getGroupSize(localWorkSize[0], localWorkSize[1], localWorkSize[2]);
|
||||
}
|
||||
return localWorkSize;
|
||||
}
|
||||
uint32_t KernelImp::getNumGrfRequired() {
|
||||
return getImmutableData()->getDescriptor().kernelAttributes.numGrfRequired;
|
||||
}
|
||||
NEO::GraphicsAllocation *KernelImp::getIsaAllocation() {
|
||||
|
||||
NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const {
|
||||
return getImmutableData()->getIsaGraphicsAllocation();
|
||||
}
|
||||
bool KernelImp::hasGroupCounts() {
|
||||
return getGroupCountOffsets(groupCountOffsets);
|
||||
}
|
||||
bool KernelImp::hasGroupSize() {
|
||||
return getGroupSizeOffsets(groupSizeOffsets);
|
||||
}
|
||||
const void *KernelImp::getSurfaceStateHeap() {
|
||||
return getSurfaceStateHeapData();
|
||||
}
|
||||
const void *KernelImp::getDynamicStateHeap() {
|
||||
return getDynamicStateHeapData();
|
||||
}
|
||||
const void *KernelImp::getCrossThread() {
|
||||
return getCrossThreadData();
|
||||
}
|
||||
const void *KernelImp::getPerThread() {
|
||||
return getPerThreadData();
|
||||
}
|
||||
bool KernelImp::isInlineDataRequired() {
|
||||
return getImmutableData()->getDescriptor().kernelAttributes.flags.passInlineData;
|
||||
}
|
||||
|
||||
uint8_t KernelImp::getNumLocalIdChannels() {
|
||||
return getImmutableData()->getDescriptor().kernelAttributes.numLocalIdChannels;
|
||||
}
|
||||
} // namespace L0
|
||||
|
@ -42,10 +42,6 @@ struct KernelImp : Kernel {
|
||||
|
||||
void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override;
|
||||
|
||||
bool getGroupCountOffsets(uint32_t *locations) override;
|
||||
|
||||
bool getGroupSizeOffsets(uint32_t *locations) override;
|
||||
|
||||
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
uint32_t groupSizeZ) override;
|
||||
|
||||
@ -62,13 +58,6 @@ struct KernelImp : Kernel {
|
||||
return residencyContainer;
|
||||
}
|
||||
|
||||
void getGroupSize(uint32_t &outGroupSizeX, uint32_t &outGroupSizeY,
|
||||
uint32_t &outGroupSizeZ) const override {
|
||||
outGroupSizeX = this->groupSize[0];
|
||||
outGroupSizeY = this->groupSize[1];
|
||||
outGroupSizeZ = this->groupSize[2];
|
||||
}
|
||||
|
||||
ze_result_t setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal);
|
||||
|
||||
ze_result_t setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal);
|
||||
@ -89,7 +78,7 @@ struct KernelImp : Kernel {
|
||||
uint32_t getPerThreadDataSizeForWholeThreadGroup() const override { return perThreadDataSizeForWholeThreadGroup; }
|
||||
|
||||
uint32_t getPerThreadDataSize() const override { return perThreadDataSize; }
|
||||
uint32_t getThreadsPerThreadGroup() const override { return threadsPerThreadGroup; }
|
||||
uint32_t getNumThreadsPerThreadGroup() const override { return numThreadsPerThreadGroup; }
|
||||
uint32_t getThreadExecutionMask() const override { return threadExecutionMask; }
|
||||
|
||||
NEO::GraphicsAllocation *getPrintfBufferAllocation() override { return this->printfBuffer; }
|
||||
@ -99,41 +88,20 @@ struct KernelImp : Kernel {
|
||||
uint32_t getSurfaceStateHeapDataSize() const override { return surfaceStateHeapDataSize; }
|
||||
|
||||
const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); }
|
||||
size_t getDynamicStateHeapDataSize() const override { return dynamicStateHeapDataSize; }
|
||||
|
||||
const KernelImmutableData *getImmutableData() const override { return kernelImmData; }
|
||||
|
||||
UnifiedMemoryControls getUnifiedMemoryControls() const override { return unifiedMemoryControls; }
|
||||
bool hasIndirectAllocationsAllowed() const override;
|
||||
|
||||
bool hasBarriers() override;
|
||||
uint32_t getSlmTotalSize() override;
|
||||
uint32_t getBindingTableOffset() override;
|
||||
uint32_t getBorderColor() override;
|
||||
uint32_t getSamplerTableOffset() override;
|
||||
uint32_t getNumSurfaceStates() override;
|
||||
uint32_t getNumSamplers() override;
|
||||
uint32_t getSimdSize() override;
|
||||
uint32_t getSizeCrossThreadData() override;
|
||||
uint32_t getPerThreadScratchSize() override;
|
||||
uint32_t getThreadsPerThreadGroupCount() override;
|
||||
uint32_t getSizePerThreadData() override;
|
||||
uint32_t getSizePerThreadDataForWholeGroup() override;
|
||||
uint32_t getSizeSurfaceStateHeapData() override;
|
||||
uint32_t getPerThreadExecutionMask() override;
|
||||
uint32_t *getCountOffsets() override;
|
||||
uint32_t *getSizeOffsets() override;
|
||||
uint32_t *getLocalWorkSize() override;
|
||||
uint32_t getNumGrfRequired() override;
|
||||
NEO::GraphicsAllocation *getIsaAllocation() override;
|
||||
bool hasGroupCounts() override;
|
||||
bool hasGroupSize() override;
|
||||
const void *getSurfaceStateHeap() override;
|
||||
const void *getDynamicStateHeap() override;
|
||||
const void *getCrossThread() override;
|
||||
const void *getPerThread() override;
|
||||
bool isInlineDataRequired() override;
|
||||
uint8_t getNumLocalIdChannels() override;
|
||||
const NEO::KernelDescriptor &getKernelDescriptor() const override {
|
||||
return kernelImmData->getDescriptor();
|
||||
}
|
||||
const uint32_t *getGroupSize() const override {
|
||||
return groupSize;
|
||||
}
|
||||
uint32_t getSlmTotalSize() const override;
|
||||
NEO::GraphicsAllocation *getIsaAllocation() const override;
|
||||
|
||||
protected:
|
||||
KernelImp() = default;
|
||||
@ -153,7 +121,7 @@ struct KernelImp : Kernel {
|
||||
NEO::GraphicsAllocation *printfBuffer = nullptr;
|
||||
|
||||
uint32_t groupSize[3] = {0u, 0u, 0u};
|
||||
uint32_t threadsPerThreadGroup = 0u;
|
||||
uint32_t numThreadsPerThreadGroup = 0u;
|
||||
uint32_t threadExecutionMask = 0u;
|
||||
|
||||
std::unique_ptr<uint8_t[]> crossThreadData = 0;
|
||||
|
@ -12,26 +12,5 @@
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
TEST(Kernel, givenPassInlineDataTrueWhenCallingIsInlineDataRequiredThenTrueIsReturned) {
|
||||
Mock<Kernel> kernel;
|
||||
|
||||
kernel.descriptor.kernelAttributes.flags.passInlineData = true;
|
||||
EXPECT_TRUE(kernel.isInlineDataRequired());
|
||||
}
|
||||
|
||||
TEST(Kernel, givenPassInlineDataFalseWhenCallingIsInlineDataRequiredThenFalseIsReturned) {
|
||||
Mock<Kernel> kernel;
|
||||
|
||||
kernel.descriptor.kernelAttributes.flags.passInlineData = false;
|
||||
EXPECT_FALSE(kernel.isInlineDataRequired());
|
||||
}
|
||||
|
||||
TEST(Kernel, whenGettingLocalIdsChannelNumberThenCorrectValueIsReturned) {
|
||||
Mock<Kernel> kernel;
|
||||
|
||||
kernel.descriptor.kernelAttributes.numLocalIdChannels = 3;
|
||||
EXPECT_EQ(3u, kernel.getNumLocalIdChannels());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
@ -51,7 +51,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
static void setAdditionalInfo(
|
||||
INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor,
|
||||
const Kernel &kernel,
|
||||
const uint32_t threadsPerThreadGroup);
|
||||
const uint32_t numThreadsPerThreadGroup);
|
||||
|
||||
inline static uint32_t additionalSizeRequiredDsh();
|
||||
|
||||
@ -64,7 +64,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
size_t bindingTablePointer,
|
||||
size_t offsetSamplerState,
|
||||
uint32_t numSamplers,
|
||||
uint32_t threadsPerThreadGroup,
|
||||
uint32_t numThreadsPerThreadGroup,
|
||||
const Kernel &kernel,
|
||||
uint32_t bindingTablePrefetchSize,
|
||||
PreemptionMode preemptionMode,
|
||||
|
@ -144,8 +144,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenCopyingBufferToIma
|
||||
auto localWorkSize = std::min(
|
||||
maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height);
|
||||
auto simd = 32u;
|
||||
auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength());
|
||||
|
||||
|
@ -146,8 +146,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenCopyingImageThenInterfaceD
|
||||
auto localWorkSize = std::min(maxLocalSize,
|
||||
Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height);
|
||||
auto simd = 32u;
|
||||
auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength());
|
||||
|
||||
|
@ -145,8 +145,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenCopyingImageToBuff
|
||||
auto localWorkSize = std::min(
|
||||
maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height);
|
||||
auto simd = 32u;
|
||||
auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength());
|
||||
|
||||
|
@ -153,8 +153,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenFillingImageThenInterfaceD
|
||||
auto localWorkSize = std::min(maxLocalSize,
|
||||
Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height);
|
||||
auto simd = 32u;
|
||||
auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength());
|
||||
|
||||
|
@ -154,8 +154,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenReadingImageThenInterfaceD
|
||||
|
||||
auto localWorkSize = 4u;
|
||||
auto simd = 32u;
|
||||
auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength());
|
||||
|
||||
|
@ -155,8 +155,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenInterface
|
||||
// EnqueueWriteImage uses a byte copy. Need to convert to bytes.
|
||||
auto localWorkSize = 2 * 2 * sizeof(float);
|
||||
auto simd = 32;
|
||||
auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd);
|
||||
EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength());
|
||||
EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength());
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
||||
#include "shared/source/kernel/kernel_arg_descriptor.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@ -96,9 +97,8 @@ struct EncodeIndirectParams {
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using MI_MATH = typename GfxFamily::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
|
||||
static void setGroupCountIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress);
|
||||
|
||||
static void setGroupSizeIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]);
|
||||
static void setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress);
|
||||
static void setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws);
|
||||
|
||||
static size_t getCmdsSizeForIndirectParams();
|
||||
static size_t getCmdsSizeForSetGroupSizeIndirect();
|
||||
|
@ -199,17 +199,23 @@ void EncodeMathMMIO<Family>::encodeAluAdd(MI_MATH_ALU_INST_INLINE *pAluParam,
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress) {
|
||||
EncodeStoreMMIO<Family>::encode(container, GPUGPU_DISPATCHDIMX, ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[0]));
|
||||
EncodeStoreMMIO<Family>::encode(container, GPUGPU_DISPATCHDIMY, ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[1]));
|
||||
EncodeStoreMMIO<Family>::encode(container, GPUGPU_DISPATCHDIMZ, ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[2]));
|
||||
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeStoreMMIO<Family>::encode(container, GPUGPU_DISPATCHDIM[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGroupSizeIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]) {
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIMX, lws[0], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[0]));
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIMY, lws[1], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[1]));
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIMZ, lws[2], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[2]));
|
||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIM[i], lws[i], ptrOffset(reinterpret_cast<uint64_t>(crossThreadAddress), offsets[i]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
@ -29,9 +29,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END;
|
||||
|
||||
auto sizeCrossThreadData = dispatchInterface->getSizeCrossThreadData();
|
||||
auto sizePerThreadData = dispatchInterface->getSizePerThreadData();
|
||||
auto sizePerThreadDataForWholeGroup = dispatchInterface->getSizePerThreadDataForWholeGroup();
|
||||
auto &kernelDescriptor = dispatchInterface->getKernelDescriptor();
|
||||
auto sizeCrossThreadData = dispatchInterface->getCrossThreadDataSize();
|
||||
auto sizePerThreadData = dispatchInterface->getPerThreadDataSize();
|
||||
auto sizePerThreadDataForWholeGroup = dispatchInterface->getPerThreadDataSizeForWholeThreadGroup();
|
||||
|
||||
LinearStream *listCmdBufferStream = container.getCommandStream();
|
||||
|
||||
@ -58,26 +59,26 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
EncodeStates<Family>::adjustStateComputeMode(*container.getCommandStream(), container.lastSentNumGrfRequired, nullptr, false, false);
|
||||
EncodeWA<Family>::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), false);
|
||||
|
||||
auto threadsPerThreadGroup = dispatchInterface->getThreadsPerThreadGroupCount();
|
||||
idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
|
||||
auto numThreadsPerThreadGroup = dispatchInterface->getNumThreadsPerThreadGroup();
|
||||
idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
|
||||
|
||||
idd.setBarrierEnable(dispatchInterface->hasBarriers());
|
||||
idd.setBarrierEnable(kernelDescriptor.kernelAttributes.flags.usesBarriers);
|
||||
idd.setSharedLocalMemorySize(
|
||||
dispatchInterface->getSlmTotalSize() > 0
|
||||
? static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(HardwareCommandsHelper<Family>::computeSlmValues(dispatchInterface->getSlmTotalSize()))
|
||||
: INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K);
|
||||
|
||||
{
|
||||
auto bindingTableStateCount = dispatchInterface->getNumSurfaceStates();
|
||||
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
uint32_t bindingTablePointer = 0u;
|
||||
|
||||
if (bindingTableStateCount > 0u) {
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSizeSurfaceStateHeapData(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
bindingTablePointer = static_cast<uint32_t>(HardwareCommandsHelper<Family>::pushBindingTableAndSurfaceStates(
|
||||
*ssh, bindingTableStateCount,
|
||||
dispatchInterface->getSurfaceStateHeap(),
|
||||
dispatchInterface->getSizeSurfaceStateHeapData(), bindingTableStateCount,
|
||||
dispatchInterface->getBindingTableOffset()));
|
||||
dispatchInterface->getSurfaceStateHeapData(),
|
||||
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
|
||||
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
||||
}
|
||||
|
||||
idd.setBindingTablePointer(bindingTablePointer);
|
||||
@ -96,12 +97,12 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
uint32_t samplerStateOffset = 0;
|
||||
uint32_t samplerCount = 0;
|
||||
|
||||
if (dispatchInterface->getNumSamplers() > 0) {
|
||||
samplerCount = dispatchInterface->getNumSamplers();
|
||||
samplerStateOffset = EncodeStates<Family>::copySamplerState(heap, dispatchInterface->getSamplerTableOffset(),
|
||||
dispatchInterface->getNumSamplers(),
|
||||
dispatchInterface->getBorderColor(),
|
||||
dispatchInterface->getDynamicStateHeap());
|
||||
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
|
||||
samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
||||
samplerStateOffset = EncodeStates<Family>::copySamplerState(heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
|
||||
kernelDescriptor.payloadMappings.samplerTable.numSamplers,
|
||||
kernelDescriptor.payloadMappings.samplerTable.borderColor,
|
||||
dispatchInterface->getDynamicStateHeapData());
|
||||
}
|
||||
|
||||
idd.setSamplerStatePointer(samplerStateOffset);
|
||||
@ -129,21 +130,17 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
offsetThreadData = heapIndirect->getHeapGpuStartOffset() + static_cast<uint64_t>(heapIndirect->getUsed() - sizeThreadData);
|
||||
|
||||
memcpy_s(ptr, sizeCrossThreadData,
|
||||
dispatchInterface->getCrossThread(), sizeCrossThreadData);
|
||||
dispatchInterface->getCrossThreadData(), sizeCrossThreadData);
|
||||
|
||||
if (isIndirect) {
|
||||
void *gpuPtr = reinterpret_cast<void *>(heapIndirect->getHeapGpuBase() + heapIndirect->getUsed() - sizeThreadData);
|
||||
if (dispatchInterface->hasGroupCounts()) {
|
||||
EncodeIndirectParams<Family>::setGroupCountIndirect(container, dispatchInterface->getCountOffsets(), gpuPtr);
|
||||
}
|
||||
if (dispatchInterface->hasGroupSize()) {
|
||||
EncodeIndirectParams<Family>::setGroupSizeIndirect(container, dispatchInterface->getSizeOffsets(), gpuPtr, dispatchInterface->getLocalWorkSize());
|
||||
}
|
||||
EncodeIndirectParams<Family>::setGroupCountIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups, gpuPtr);
|
||||
EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize, gpuPtr, dispatchInterface->getGroupSize());
|
||||
}
|
||||
|
||||
ptr = ptrOffset(ptr, sizeCrossThreadData);
|
||||
memcpy_s(ptr, sizePerThreadDataForWholeGroup,
|
||||
dispatchInterface->getPerThread(), sizePerThreadDataForWholeGroup);
|
||||
dispatchInterface->getPerThreadData(), sizePerThreadDataForWholeGroup);
|
||||
}
|
||||
|
||||
auto slmSizeNew = dispatchInterface->getSlmTotalSize();
|
||||
@ -185,14 +182,14 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
cmd.setThreadGroupIdZDimension(threadDims[2]);
|
||||
}
|
||||
|
||||
auto simdSize = dispatchInterface->getSimdSize();
|
||||
auto simdSize = kernelDescriptor.kernelAttributes.simdSize;
|
||||
auto simdSizeOp = getSimdConfig<WALKER_TYPE>(simdSize);
|
||||
|
||||
cmd.setSimdSize(simdSizeOp);
|
||||
|
||||
cmd.setRightExecutionMask(dispatchInterface->getPerThreadExecutionMask());
|
||||
cmd.setRightExecutionMask(dispatchInterface->getThreadExecutionMask());
|
||||
cmd.setBottomExecutionMask(0xffffffff);
|
||||
cmd.setThreadWidthCounterMaximum(threadsPerThreadGroup);
|
||||
cmd.setThreadWidthCounterMaximum(numThreadsPerThreadGroup);
|
||||
|
||||
cmd.setPredicateEnable(isPredicate);
|
||||
|
||||
|
@ -25,8 +25,8 @@ uint32_t KernelHelper::getMaxWorkGroupCount(uint32_t simd, uint32_t availableThr
|
||||
workGroupSize *= localWorkSize[i];
|
||||
}
|
||||
|
||||
auto threadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simd));
|
||||
auto maxWorkGroupsCount = availableThreadCount / threadsPerThreadGroup;
|
||||
auto numThreadsPerThreadGroup = static_cast<uint32_t>(Math::divideAndRoundUp(workGroupSize, simd));
|
||||
auto maxWorkGroupsCount = availableThreadCount / numThreadsPerThreadGroup;
|
||||
|
||||
if (numberOfBarriers > 0) {
|
||||
auto maxWorkGroupsCountDueToBarrierUsage = dssCount * (maxBarrierCount / numberOfBarriers);
|
||||
|
@ -19,6 +19,8 @@ constexpr uint32_t GPUGPU_DISPATCHDIMX = 0x2500;
|
||||
constexpr uint32_t GPUGPU_DISPATCHDIMY = 0x2504;
|
||||
constexpr uint32_t GPUGPU_DISPATCHDIMZ = 0x2508;
|
||||
|
||||
constexpr uint32_t GPUGPU_DISPATCHDIM[3] = {GPUGPU_DISPATCHDIMX, GPUGPU_DISPATCHDIMY, GPUGPU_DISPATCHDIMZ};
|
||||
|
||||
constexpr uint32_t CS_GPR_R0 = 0x2600;
|
||||
constexpr uint32_t CS_GPR_R1 = 0x2608;
|
||||
constexpr uint32_t CS_GPR_R2 = 0x2610;
|
||||
|
@ -10,42 +10,28 @@
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
struct KernelDescriptor;
|
||||
|
||||
struct DispatchKernelEncoderI {
|
||||
public:
|
||||
virtual bool hasBarriers() = 0;
|
||||
virtual uint32_t getSlmTotalSize() = 0;
|
||||
virtual uint32_t getBindingTableOffset() = 0;
|
||||
virtual uint32_t getBorderColor() = 0;
|
||||
virtual uint32_t getSamplerTableOffset() = 0;
|
||||
virtual uint32_t getNumSurfaceStates() = 0;
|
||||
virtual uint32_t getNumSamplers() = 0;
|
||||
virtual uint32_t getSimdSize() = 0;
|
||||
virtual uint32_t getSizeCrossThreadData() = 0;
|
||||
virtual uint32_t getPerThreadScratchSize() = 0;
|
||||
virtual uint32_t getPerThreadExecutionMask() = 0;
|
||||
virtual uint32_t getSizePerThreadData() = 0;
|
||||
virtual uint32_t getSizePerThreadDataForWholeGroup() = 0;
|
||||
virtual uint32_t getSizeSurfaceStateHeapData() = 0;
|
||||
virtual uint32_t *getCountOffsets() = 0;
|
||||
virtual uint32_t *getSizeOffsets() = 0;
|
||||
virtual uint32_t *getLocalWorkSize() = 0;
|
||||
virtual uint32_t getNumGrfRequired() = 0;
|
||||
virtual uint32_t getThreadsPerThreadGroupCount() = 0;
|
||||
virtual GraphicsAllocation *getIsaAllocation() = 0;
|
||||
virtual bool hasGroupCounts() = 0;
|
||||
virtual bool hasGroupSize() = 0;
|
||||
virtual const void *getSurfaceStateHeap() = 0;
|
||||
virtual const void *getDynamicStateHeap() = 0;
|
||||
virtual const void *getCrossThread() = 0;
|
||||
virtual const void *getPerThread() = 0;
|
||||
virtual bool isInlineDataRequired() = 0;
|
||||
virtual uint8_t getNumLocalIdChannels() = 0;
|
||||
virtual ~DispatchKernelEncoderI() = default;
|
||||
|
||||
protected:
|
||||
uint32_t groupCountOffsets[3] = {};
|
||||
uint32_t groupSizeOffsets[3] = {};
|
||||
uint32_t localWorkSize[3] = {};
|
||||
virtual const KernelDescriptor &getKernelDescriptor() const = 0;
|
||||
virtual const uint32_t *getGroupSize() const = 0;
|
||||
virtual uint32_t getSlmTotalSize() const = 0;
|
||||
|
||||
virtual const uint8_t *getCrossThreadData() const = 0;
|
||||
virtual uint32_t getCrossThreadDataSize() const = 0;
|
||||
|
||||
virtual uint32_t getThreadExecutionMask() const = 0;
|
||||
virtual uint32_t getNumThreadsPerThreadGroup() const = 0;
|
||||
virtual const uint8_t *getPerThreadData() const = 0;
|
||||
virtual uint32_t getPerThreadDataSize() const = 0;
|
||||
virtual uint32_t getPerThreadDataSizeForWholeThreadGroup() const = 0;
|
||||
|
||||
virtual const uint8_t *getSurfaceStateHeapData() const = 0;
|
||||
virtual uint32_t getSurfaceStateHeapDataSize() const = 0;
|
||||
|
||||
virtual GraphicsAllocation *getIsaAllocation() const = 0;
|
||||
virtual const uint8_t *getDynamicStateHeapData() const = 0;
|
||||
};
|
||||
} // namespace NEO
|
@ -55,7 +55,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
|
||||
dst.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress = (0 != execEnv.SubgroupIndependentForwardProgressRequired);
|
||||
dst.kernelAttributes.numGrfRequired = execEnv.NumGRFRequired;
|
||||
dst.kernelAttributes.flags.useGlobalAtomics = execEnv.HasGlobalAtomics;
|
||||
dst.kernelAttributes.flags.usesStatelessWrites = 0U;
|
||||
dst.kernelAttributes.flags.usesStatelessWrites = (execEnv.StatelessWritesCount > 0U);
|
||||
}
|
||||
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token) {
|
||||
@ -85,7 +85,6 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchInterfaceDescri
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchThreadPayload &token) {
|
||||
dst.kernelAttributes.flags.perThreadDataHeaderIsPresent = (0U != token.HeaderPresent);
|
||||
dst.kernelAttributes.numLocalIdChannels = token.LocalIDXPresent + token.LocalIDYPresent + token.LocalIDZPresent;
|
||||
;
|
||||
dst.kernelAttributes.flags.usesFlattenedLocalIds = (0U != token.LocalIDFlattenedPresent);
|
||||
dst.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent = (0U != token.UnusedPerThreadConstantPresent);
|
||||
dst.kernelAttributes.flags.passInlineData = (0 != token.PassInlineData);
|
||||
|
@ -92,7 +92,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroW
|
||||
EXPECT_EQ(expectedValue, interfaceDescriptorData->getSharedLocalMemorySize());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhenDispatchingKernelThenBindingTableOffsetIsCorrect) {
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWhenDispatchingKernelThenBindingTableOffsetIsCorrect) {
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
uint32_t numBindingTable = 1;
|
||||
@ -107,10 +107,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhen
|
||||
uint32_t dims[] = {2, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
EXPECT_CALL(*dispatchInterface.get(), getNumSurfaceStates()).WillRepeatedly(::testing::Return(numBindingTable));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeap()).WillRepeatedly(::testing::Return(&bindingTableState));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSizeSurfaceStateHeapData()).WillRepeatedly(::testing::Return(static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE))));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getBindingTableOffset()).WillRepeatedly(::testing::Return(0));
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.numEntries = numBindingTable;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.tableOffset = 0U;
|
||||
const uint8_t *sshData = reinterpret_cast<uint8_t *>(&bindingTableState);
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE))));
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
|
||||
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
|
||||
@ -132,10 +133,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhen
|
||||
uint32_t dims[] = {2, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
EXPECT_CALL(*dispatchInterface.get(), getNumSurfaceStates()).WillRepeatedly(::testing::Return(numBindingTable));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeap()).WillRepeatedly(::testing::Return(&bindingTableState));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSizeSurfaceStateHeapData()).WillRepeatedly(::testing::Return(static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE))));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getBindingTableOffset()).WillRepeatedly(::testing::Return(0));
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.numEntries = numBindingTable;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.tableOffset = 0U;
|
||||
const uint8_t *sshData = reinterpret_cast<uint8_t *>(&bindingTableState);
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(sshData));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapDataSize()).WillRepeatedly(::testing::Return(static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE))));
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
|
||||
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
|
||||
@ -156,10 +158,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispa
|
||||
uint32_t dims[] = {2, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
EXPECT_CALL(*dispatchInterface.get(), getNumSamplers()).WillRepeatedly(::testing::Return(numSamplers));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSamplerTableOffset()).WillRepeatedly(::testing::Return(0));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getBorderColor()).WillRepeatedly(::testing::Return(0));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeap()).WillRepeatedly(::testing::Return(&samplerState));
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.numSamplers = numSamplers;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.tableOffset = 0U;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.borderColor = 0U;
|
||||
const uint8_t *dshData = reinterpret_cast<uint8_t *>(&samplerState);
|
||||
EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeapData()).WillRepeatedly(::testing::Return(dshData));
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
|
||||
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
|
||||
@ -186,10 +189,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDisp
|
||||
uint32_t dims[] = {2, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
EXPECT_CALL(*dispatchInterface.get(), getNumSamplers()).WillRepeatedly(::testing::Return(numSamplers));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSamplerTableOffset()).WillRepeatedly(::testing::Return(0));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getBorderColor()).WillRepeatedly(::testing::Return(0));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeap()).WillRepeatedly(::testing::Return(&samplerState));
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.numSamplers = numSamplers;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.tableOffset = 0U;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.borderColor = 0U;
|
||||
const uint8_t *dshData = reinterpret_cast<uint8_t *>(&samplerState);
|
||||
EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeapData()).WillRepeatedly(::testing::Return(dshData));
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
|
||||
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
|
||||
@ -203,16 +207,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersZeroWhenDisp
|
||||
EXPECT_NE(memcmp(pSmplr, &samplerState, sizeof(SAMPLER_STATE)), 0);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncodeStatesTest, givenIndarectOffsetsCountsWhenDispatchingKernelThenCorrestMIStoreOffsetsSet) {
|
||||
HWTEST_F(CommandEncodeStatesTest, givenIndirectOffsetsCountsWhenDispatchingKernelThenCorrestMIStoreOffsetsSet) {
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
uint32_t dims[] = {2, 1, 1};
|
||||
uint32_t offsets[] = {0x10, 0x20, 0x30};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
EXPECT_CALL(*dispatchInterface.get(), hasGroupCounts()).WillRepeatedly(::testing::Return(true));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getCountOffsets()).WillRepeatedly(::testing::Return(offsets));
|
||||
EXPECT_CALL(*dispatchInterface.get(), hasGroupSize()).WillRepeatedly(::testing::Return(false));
|
||||
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = offsets[0];
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = offsets[1];
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = offsets[2];
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, true, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
|
||||
|
||||
GenCmdList commands;
|
||||
@ -233,11 +235,10 @@ HWTEST_F(CommandEncodeStatesTest, givenIndarectOffsetsSizeWhenDispatchingKernelT
|
||||
uint32_t lws[] = {1, 1, 1};
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
EXPECT_CALL(*dispatchInterface.get(), hasGroupCounts()).WillRepeatedly(::testing::Return(false));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSizeOffsets()).WillRepeatedly(::testing::Return(offsets));
|
||||
EXPECT_CALL(*dispatchInterface.get(), hasGroupSize()).WillRepeatedly(::testing::Return(true));
|
||||
EXPECT_CALL(*dispatchInterface.get(), getLocalWorkSize()).WillRepeatedly(::testing::Return(lws));
|
||||
|
||||
EXPECT_CALL(*dispatchInterface.get(), getGroupSize()).WillRepeatedly(::testing::Return(lws));
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = offsets[0];
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[1] = offsets[1];
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[2] = offsets[2];
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, true, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
|
||||
|
||||
GenCmdList commands;
|
||||
|
@ -185,11 +185,11 @@ HWTEST_F(CommandEncoderMathTest, setGroupSizeIndirect) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice);
|
||||
|
||||
uint32_t offsets[3] = {0, sizeof(uint32_t), 2 * sizeof(uint32_t)};
|
||||
CrossThreadDataOffset offsets[3] = {0, sizeof(uint32_t), 2 * sizeof(uint32_t)};
|
||||
uint32_t crossThreadAdress[3] = {};
|
||||
uint32_t lws[3] = {2, 1, 1};
|
||||
|
||||
EncodeIndirectParams<FamilyType>::setGroupSizeIndirect(cmdContainer, offsets, crossThreadAdress, lws);
|
||||
EncodeIndirectParams<FamilyType>::setGlobalWorkSizeIndirect(cmdContainer, offsets, crossThreadAdress, lws);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());
|
||||
@ -211,7 +211,7 @@ HWTEST_F(CommandEncoderMathTest, setGroupCountIndirect) {
|
||||
CommandContainer cmdContainer;
|
||||
cmdContainer.initialize(pDevice);
|
||||
|
||||
uint32_t offsets[3] = {0, sizeof(uint32_t), 2 * sizeof(uint32_t)};
|
||||
CrossThreadDataOffset offsets[3] = {0, sizeof(uint32_t), 2 * sizeof(uint32_t)};
|
||||
uint32_t crossThreadAdress[3] = {};
|
||||
|
||||
EncodeIndirectParams<FamilyType>::setGroupCountIndirect(cmdContainer, offsets, crossThreadAdress);
|
||||
|
@ -129,6 +129,9 @@ TEST(KernelDescriptorFromPatchtokens, GivenExecutionEnvironmentThenSetsProperPar
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.useGlobalAtomics);
|
||||
|
||||
EXPECT_FALSE(kernelDescriptor.kernelAttributes.flags.usesStatelessWrites);
|
||||
execEnv.StatelessWritesCount = 1U;
|
||||
NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4);
|
||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.usesStatelessWrites);
|
||||
}
|
||||
|
||||
TEST(KernelDescriptorFromPatchtokens, GivenThreadPayloadThenSetsProperPartsOfDescriptor) {
|
||||
|
@ -12,34 +12,28 @@ using namespace NEO;
|
||||
using ::testing::Return;
|
||||
|
||||
MockDispatchKernelEncoder::MockDispatchKernelEncoder() {
|
||||
EXPECT_CALL(*this, getIsaAllocation).WillRepeatedly(Return(&mockAllocation));
|
||||
EXPECT_CALL(*this, getSizeCrossThreadData).WillRepeatedly(Return(crossThreadSize));
|
||||
EXPECT_CALL(*this, getSizePerThreadData).WillRepeatedly(Return(perThreadSize));
|
||||
EXPECT_CALL(*this, getKernelDescriptor).WillRepeatedly(::testing::ReturnRef(kernelDescriptor));
|
||||
|
||||
EXPECT_CALL(*this, getIsaAllocation).WillRepeatedly(Return(&mockAllocation));
|
||||
EXPECT_CALL(*this, getCrossThreadDataSize).WillRepeatedly(Return(crossThreadSize));
|
||||
EXPECT_CALL(*this, getPerThreadDataSize).WillRepeatedly(Return(perThreadSize));
|
||||
|
||||
EXPECT_CALL(*this, getCrossThreadData).WillRepeatedly(Return(dataCrossThread));
|
||||
EXPECT_CALL(*this, getPerThreadData).WillRepeatedly(Return(dataPerThread));
|
||||
|
||||
EXPECT_CALL(*this, getCrossThread).WillRepeatedly(Return(&dataCrossThread));
|
||||
EXPECT_CALL(*this, getPerThread).WillRepeatedly(Return(&dataPerThread));
|
||||
expectAnyMockFunctionCall();
|
||||
}
|
||||
|
||||
void MockDispatchKernelEncoder::expectAnyMockFunctionCall() {
|
||||
EXPECT_CALL(*this, hasBarriers()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getGroupSize()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getSlmTotalSize()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getBindingTableOffset()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getBorderColor()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getSamplerTableOffset()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getNumSurfaceStates()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getNumSamplers()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getSimdSize()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getPerThreadScratchSize()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getPerThreadExecutionMask()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getSizePerThreadDataForWholeGroup()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getSizeSurfaceStateHeapData()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getCountOffsets()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getSizeOffsets()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getLocalWorkSize()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getNumGrfRequired()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getThreadsPerThreadGroupCount()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, hasGroupCounts()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getSurfaceStateHeap()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getDynamicStateHeap()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, isInlineDataRequired()).Times(::testing::AnyNumber());
|
||||
}
|
||||
|
||||
EXPECT_CALL(*this, getThreadExecutionMask()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getNumThreadsPerThreadGroup()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getPerThreadDataSizeForWholeThreadGroup()).Times(::testing::AnyNumber());
|
||||
|
||||
EXPECT_CALL(*this, getSurfaceStateHeapData()).Times(::testing::AnyNumber());
|
||||
EXPECT_CALL(*this, getSurfaceStateHeapDataSize()).Times(::testing::AnyNumber());
|
||||
|
||||
EXPECT_CALL(*this, getDynamicStateHeapData()).Times(::testing::AnyNumber());
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
|
||||
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
|
||||
|
||||
@ -20,34 +21,24 @@ class GraphicsAllocation;
|
||||
struct MockDispatchKernelEncoder : public DispatchKernelEncoderI {
|
||||
public:
|
||||
MockDispatchKernelEncoder();
|
||||
MOCK_METHOD0(hasBarriers, bool());
|
||||
MOCK_METHOD0(getSlmTotalSize, uint32_t());
|
||||
MOCK_METHOD0(getBindingTableOffset, uint32_t());
|
||||
MOCK_METHOD0(getBorderColor, uint32_t());
|
||||
MOCK_METHOD0(getSamplerTableOffset, uint32_t());
|
||||
MOCK_METHOD0(getNumSurfaceStates, uint32_t());
|
||||
MOCK_METHOD0(getNumSamplers, uint32_t());
|
||||
MOCK_METHOD0(getSimdSize, uint32_t());
|
||||
MOCK_METHOD0(getSizeCrossThreadData, uint32_t());
|
||||
MOCK_METHOD0(getPerThreadScratchSize, uint32_t());
|
||||
MOCK_METHOD0(getPerThreadExecutionMask, uint32_t());
|
||||
MOCK_METHOD0(getSizePerThreadData, uint32_t());
|
||||
MOCK_METHOD0(getSizePerThreadDataForWholeGroup, uint32_t());
|
||||
MOCK_METHOD0(getSizeSurfaceStateHeapData, uint32_t());
|
||||
MOCK_METHOD0(getCountOffsets, uint32_t *());
|
||||
MOCK_METHOD0(getSizeOffsets, uint32_t *());
|
||||
MOCK_METHOD0(getLocalWorkSize, uint32_t *());
|
||||
MOCK_METHOD0(getNumGrfRequired, uint32_t());
|
||||
MOCK_METHOD0(getThreadsPerThreadGroupCount, uint32_t());
|
||||
MOCK_METHOD0(getIsaAllocation, GraphicsAllocation *());
|
||||
MOCK_METHOD0(hasGroupCounts, bool());
|
||||
MOCK_METHOD0(hasGroupSize, bool());
|
||||
MOCK_METHOD0(getSurfaceStateHeap, const void *());
|
||||
MOCK_METHOD0(getDynamicStateHeap, const void *());
|
||||
MOCK_METHOD0(getCrossThread, const void *());
|
||||
MOCK_METHOD0(getPerThread, const void *());
|
||||
MOCK_METHOD0(isInlineDataRequired, bool());
|
||||
MOCK_METHOD0(getNumLocalIdChannels, uint8_t());
|
||||
MOCK_CONST_METHOD0(getKernelDescriptor, const KernelDescriptor &());
|
||||
MOCK_CONST_METHOD0(getGroupSize, const uint32_t *());
|
||||
MOCK_CONST_METHOD0(getSlmTotalSize, uint32_t());
|
||||
|
||||
MOCK_CONST_METHOD0(getCrossThreadData, const uint8_t *());
|
||||
MOCK_CONST_METHOD0(getCrossThreadDataSize, uint32_t());
|
||||
|
||||
MOCK_CONST_METHOD0(getThreadExecutionMask, uint32_t());
|
||||
MOCK_CONST_METHOD0(getNumThreadsPerThreadGroup, uint32_t());
|
||||
MOCK_CONST_METHOD0(getPerThreadData, const uint8_t *());
|
||||
MOCK_CONST_METHOD0(getPerThreadDataSize, uint32_t());
|
||||
MOCK_CONST_METHOD0(getPerThreadDataSizeForWholeThreadGroup, uint32_t());
|
||||
|
||||
MOCK_CONST_METHOD0(getSurfaceStateHeapData, const uint8_t *());
|
||||
MOCK_CONST_METHOD0(getSurfaceStateHeapDataSize, uint32_t());
|
||||
|
||||
MOCK_CONST_METHOD0(getIsaAllocation, GraphicsAllocation *());
|
||||
MOCK_CONST_METHOD0(getDynamicStateHeapData, const uint8_t *());
|
||||
|
||||
void expectAnyMockFunctionCall();
|
||||
|
||||
@ -56,5 +47,6 @@ struct MockDispatchKernelEncoder : public DispatchKernelEncoderI {
|
||||
static constexpr uint32_t perThreadSize = 0x20;
|
||||
uint8_t dataCrossThread[crossThreadSize];
|
||||
uint8_t dataPerThread[perThreadSize];
|
||||
KernelDescriptor kernelDescriptor;
|
||||
};
|
||||
} // namespace NEO
|
Reference in New Issue
Block a user