Change interface to program cross-thread data

Change-Id: I96bf4bddf1557f588fd745efca7b19ec2f38a78e
This commit is contained in:
Zdanowicz, Zbigniew
2018-10-17 21:38:18 -07:00
committed by sys_ocldev
parent 31bf5b9b43
commit f3a732081e
10 changed files with 115 additions and 114 deletions

View File

@@ -135,8 +135,8 @@ class GpgpuWalkerHelper {
uint32_t simd, uint32_t simd,
uint32_t workDim, uint32_t workDim,
bool localIdsGenerationByRuntime, bool localIdsGenerationByRuntime,
bool kernelUsesLocalIds, bool inlineDataProgrammingRequired,
bool inlineDataProgrammingRequired); const iOpenCL::SPatchThreadPayload &threadPayload);
static void dispatchProfilingCommandsStart( static void dispatchProfilingCommandsStart(
HwTimeStamps &hwTimeStamps, HwTimeStamps &hwTimeStamps,

View File

@@ -20,8 +20,8 @@ inline size_t GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(
uint32_t simd, uint32_t simd,
uint32_t workDim, uint32_t workDim,
bool localIdsGenerationByRuntime, bool localIdsGenerationByRuntime,
bool kernelUsesLocalIds, bool inlineDataProgrammingRequired,
bool inlineDataProgrammingRequired) { const iOpenCL::SPatchThreadPayload &threadPayload) {
auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2]; auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2];
auto threadsPerWorkGroup = getThreadsPerWG(simd, localWorkSize); auto threadsPerWorkGroup = getThreadsPerWG(simd, localWorkSize);
@@ -155,7 +155,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
size_t globalOffsets[3] = {0, 0, 0}; size_t globalOffsets[3] = {0, 0, 0};
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1}; size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(pGpGpuWalkerCmd, globalOffsets, globalOffsets, workGroups, localWorkSizes, GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(pGpGpuWalkerCmd, globalOffsets, globalOffsets, workGroups, localWorkSizes,
simd, 1, localIdsGenerationByRuntime, kernelUsesLocalIds, inlineDataProgrammingRequired); simd, 1, localIdsGenerationByRuntime, inlineDataProgrammingRequired,
*scheduler.getKernelInfo().patchInfo.threadPayload);
// Implement disabling special WA DisableLSQCROPERFforOCL if needed // Implement disabling special WA DisableLSQCROPERFforOCL if needed
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(commandStream, scheduler, false); GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(commandStream, scheduler, false);

View File

@@ -223,7 +223,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z}; size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z};
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups, GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups,
numWorkGroups, localWorkSizes, simd, dim, numWorkGroups, localWorkSizes, simd, dim,
localIdsGenerationByRuntime, kernelUsesLocalIds, inlineDataProgrammingRequired); localIdsGenerationByRuntime, inlineDataProgrammingRequired,
*kernel.getKernelInfo().patchInfo.threadPayload);
dispatchWorkarounds(commandStream, commandQueue, kernel, false); dispatchWorkarounds(commandStream, commandQueue, kernel, false);
currentDispatchIndex++; currentDispatchIndex++;

View File

@@ -75,7 +75,10 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
static size_t sendCrossThreadData( static size_t sendCrossThreadData(
IndirectHeap &indirectHeap, IndirectHeap &indirectHeap,
Kernel &kernel); Kernel &kernel,
bool inlineDataProgrammingRequired,
WALKER_TYPE<GfxFamily> *walkerCmd,
uint32_t &sizeCrossThreadData);
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo, static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo,
const void *srcKernelSsh, size_t srcKernelSshSize, const void *srcKernelSsh, size_t srcKernelSshSize,
@@ -197,18 +200,6 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
WALKER_TYPE<GfxFamily> *walkerCmd, WALKER_TYPE<GfxFamily> *walkerCmd,
uint32_t &interfaceDescriptorIndex); uint32_t &interfaceDescriptorIndex);
static void getCrossThreadData(
uint32_t &sizeCrossThreadData,
size_t &offsetCrossThreadData,
Kernel &kernel,
const bool &inlineDataProgrammingRequired,
IndirectHeap &ioh,
WALKER_TYPE<GfxFamily> *walkerCmd);
inline static size_t getCrossThreadDataSize(
uint32_t &sizeCrossThreadData,
Kernel &kernel);
static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData); static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData);
static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize); static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize);
static void programPipeControlDataWriteWithCsStall(LinearStream &commandStream, uint64_t writeAddress, uint64_t data); static void programPipeControlDataWriteWithCsStall(LinearStream &commandStream, uint64_t writeAddress, uint64_t data);

View File

@@ -163,24 +163,6 @@ size_t KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
return (size_t)offsetInterfaceDescriptor; return (size_t)offsetInterfaceDescriptor;
} }
template <typename GfxFamily>
size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
IndirectHeap &indirectHeap,
Kernel &kernel) {
indirectHeap.align(GfxFamily::WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
auto offsetCrossThreadData = indirectHeap.getUsed();
auto sizeCrossThreadData = kernel.getCrossThreadDataSize();
char *pDest = static_cast<char *>(indirectHeap.getSpace(sizeCrossThreadData));
memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress());
}
return offsetCrossThreadData + static_cast<size_t>(indirectHeap.getHeapGpuStartOffset());
}
// Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base addess) // Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base addess)
// as required by the INTERFACE_DESCRIPTOR_DATA. // as required by the INTERFACE_DESCRIPTOR_DATA.
template <typename GfxFamily> template <typename GfxFamily>
@@ -308,21 +290,16 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
auto threadsPerThreadGroup = static_cast<uint32_t>(getThreadsPerWG(simd, localWorkItems)); auto threadsPerThreadGroup = static_cast<uint32_t>(getThreadsPerWG(simd, localWorkItems));
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
uint32_t sizeCrossThreadData = 0; uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize();
size_t offsetCrossThreadData = 0;
getCrossThreadData( size_t offsetCrossThreadData = KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
sizeCrossThreadData, ioh, kernel, inlineDataProgrammingRequired,
offsetCrossThreadData, walkerCmd, sizeCrossThreadData);
kernel,
inlineDataProgrammingRequired,
ioh,
walkerCmd);
size_t sizePerThreadDataTotal = 0; size_t sizePerThreadDataTotal = 0;
size_t sizePerThreadData = 0; size_t sizePerThreadData = 0;
programPerThreadData( KernelCommandsHelper<GfxFamily>::programPerThreadData(
sizePerThreadData, sizePerThreadData,
localIdsGenerationByRuntime, localIdsGenerationByRuntime,
ioh, ioh,
@@ -345,7 +322,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
dsh, dsh,
offsetInterfaceDescriptor, offsetInterfaceDescriptor,
kernelStartOffset, kernelStartOffset,
getCrossThreadDataSize(sizeCrossThreadData, kernel), sizeCrossThreadData,
sizePerThreadData, sizePerThreadData,
dstBindingTablePointer, dstBindingTablePointer,
samplerStateOffset, samplerStateOffset,
@@ -441,8 +418,7 @@ bool KernelCommandsHelper<GfxFamily>::doBindingTablePrefetch() {
template <typename GfxFamily> template <typename GfxFamily>
bool KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(const Kernel &kernel) { bool KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(const Kernel &kernel) {
if (DebugManager.flags.EnablePassInlineData.get()) { if (DebugManager.flags.EnablePassInlineData.get()) {
return kernel.getKernelInfo().patchInfo.threadPayload->PassInlineData && return kernel.getKernelInfo().patchInfo.threadPayload->PassInlineData;
kernel.getCrossThreadDataSize() <= sizeof(GRF);
} }
return false; return false;
} }

View File

@@ -117,6 +117,26 @@ void KernelCommandsHelper<GfxFamily>::programPerThreadData(
updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems); updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems);
} }
template <typename GfxFamily>
size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
IndirectHeap &indirectHeap,
Kernel &kernel,
bool inlineDataProgrammingRequired,
WALKER_TYPE<GfxFamily> *walkerCmd,
uint32_t &sizeCrossThreadData) {
indirectHeap.align(WALKER_TYPE<GfxFamily>::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
auto offsetCrossThreadData = indirectHeap.getUsed();
char *pDest = static_cast<char *>(indirectHeap.getSpace(sizeCrossThreadData));
memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress());
}
return offsetCrossThreadData + static_cast<size_t>(indirectHeap.getHeapGpuStartOffset());
}
template <typename GfxFamily> template <typename GfxFamily>
bool KernelCommandsHelper<GfxFamily>::resetBindingTablePrefetch(Kernel &kernel) { bool KernelCommandsHelper<GfxFamily>::resetBindingTablePrefetch(Kernel &kernel) {
return kernel.isSchedulerKernel || !doBindingTablePrefetch(); return kernel.isSchedulerKernel || !doBindingTablePrefetch();
@@ -130,29 +150,6 @@ void KernelCommandsHelper<GfxFamily>::setInterfaceDescriptorOffset(
walkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++); walkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++);
} }
template <typename GfxFamily>
void KernelCommandsHelper<GfxFamily>::getCrossThreadData(
uint32_t &sizeCrossThreadData,
size_t &offsetCrossThreadData,
Kernel &kernel,
const bool &inlineDataProgrammingRequired,
IndirectHeap &ioh,
WALKER_TYPE<GfxFamily> *walkerCmd) {
sizeCrossThreadData = kernel.getCrossThreadDataSize();
offsetCrossThreadData = sendCrossThreadData(
ioh,
kernel);
}
template <typename GfxFamily>
size_t KernelCommandsHelper<GfxFamily>::getCrossThreadDataSize(
uint32_t &sizeCrossThreadData,
Kernel &kernel) {
return sizeCrossThreadData;
}
template <typename GfxFamily> template <typename GfxFamily>
bool KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(uint32_t workDim, size_t *gws, size_t *lws) { bool KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(uint32_t workDim, size_t *gws, size_t *lws) {
return true; return true;

View File

@@ -6,6 +6,7 @@
*/ */
#include "hw_cmds.h" #include "hw_cmds.h"
#include "patch_shared.h"
#include "runtime/command_queue/gpgpu_walker.h" #include "runtime/command_queue/gpgpu_walker.h"
#include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/debug_manager_state_restore.h"
@@ -84,8 +85,9 @@ struct WorkGroupSizeBase {
(workItems[0] + workGroupSize[0] - 1) / workGroupSize[0], (workItems[0] + workGroupSize[0] - 1) / workGroupSize[0],
(workItems[1] + workGroupSize[1] - 1) / workGroupSize[1], (workItems[1] + workGroupSize[1] - 1) / workGroupSize[1],
(workItems[2] + workGroupSize[2] - 1) / workGroupSize[2]}; (workItems[2] + workGroupSize[2] - 1) / workGroupSize[2]};
const iOpenCL::SPatchThreadPayload threadPayload = {};
GpgpuWalkerHelper<FamilyType>::setGpgpuWalkerThreadData(&pCmd, globalOffsets, workGroupsStart, workGroupsNum, GpgpuWalkerHelper<FamilyType>::setGpgpuWalkerThreadData(&pCmd, globalOffsets, workGroupsStart, workGroupsNum,
workGroupSize, simdSize, dims, true, false, false); workGroupSize, simdSize, dims, true, false, threadPayload);
//And check if it is programmed correctly //And check if it is programmed correctly
auto numWorkItems = computeWalkerWorkItems<FamilyType>(pCmd); auto numWorkItems = computeWalkerWorkItems<FamilyType>(pCmd);

View File

@@ -155,10 +155,13 @@ HWTEST_F(KernelCommandsTest, sendCrossThreadDataResourceUsage) {
auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);
auto usedBefore = indirectHeap.getUsed(); auto usedBefore = indirectHeap.getUsed();
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
KernelCommandsHelper<FamilyType>::sendCrossThreadData( KernelCommandsHelper<FamilyType>::sendCrossThreadData(
indirectHeap, indirectHeap,
*kernel); *kernel,
false,
nullptr,
sizeCrossThreadData);
auto usedAfter = indirectHeap.getUsed(); auto usedAfter = indirectHeap.getUsed();
EXPECT_EQ(kernel->getCrossThreadDataSize(), usedAfter - usedBefore); EXPECT_EQ(kernel->getCrossThreadDataSize(), usedAfter - usedBefore);
@@ -178,10 +181,13 @@ HWTEST_F(KernelCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComment
PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap};
kernel->getPatchInfoDataList().push_back(patchInfoData); kernel->getPatchInfoDataList().push_back(patchInfoData);
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
KernelCommandsHelper<FamilyType>::sendCrossThreadData( KernelCommandsHelper<FamilyType>::sendCrossThreadData(
indirectHeap, indirectHeap,
*kernel); *kernel,
false,
nullptr,
sizeCrossThreadData);
ASSERT_EQ(1u, kernel->getPatchInfoDataList().size()); ASSERT_EQ(1u, kernel->getPatchInfoDataList().size());
EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation); EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation);
@@ -197,9 +203,13 @@ HWTEST_F(KernelCommandsTest, givenIndirectHeapNotAllocatedFromInternalPoolWhenSe
IndirectHeap indirectHeap(nonInternalAllocation, false); IndirectHeap indirectHeap(nonInternalAllocation, false);
MockKernelWithInternals mockKernelWithInternal(*pDevice); MockKernelWithInternals mockKernelWithInternal(*pDevice);
auto sizeCrossThreadData = mockKernelWithInternal.mockKernel->getCrossThreadDataSize();
auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData( auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
indirectHeap, indirectHeap,
*mockKernelWithInternal.mockKernel); *mockKernelWithInternal.mockKernel,
false,
nullptr,
sizeCrossThreadData);
EXPECT_EQ(0u, offset); EXPECT_EQ(0u, offset);
pDevice->getMemoryManager()->freeGraphicsMemory(nonInternalAllocation); pDevice->getMemoryManager()->freeGraphicsMemory(nonInternalAllocation);
} }
@@ -210,9 +220,13 @@ HWTEST_F(KernelCommandsTest, givenIndirectHeapAllocatedFromInternalPoolWhenSendC
auto expectedOffset = internalAllocation->getGpuAddressToPatch(); auto expectedOffset = internalAllocation->getGpuAddressToPatch();
MockKernelWithInternals mockKernelWithInternal(*pDevice); MockKernelWithInternals mockKernelWithInternal(*pDevice);
auto sizeCrossThreadData = mockKernelWithInternal.mockKernel->getCrossThreadDataSize();
auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData( auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
indirectHeap, indirectHeap,
*mockKernelWithInternal.mockKernel); *mockKernelWithInternal.mockKernel,
false,
nullptr,
sizeCrossThreadData);
EXPECT_EQ(expectedOffset, offset); EXPECT_EQ(expectedOffset, offset);
pDevice->getMemoryManager()->freeGraphicsMemory(internalAllocation); pDevice->getMemoryManager()->freeGraphicsMemory(internalAllocation);
@@ -239,10 +253,13 @@ HWTEST_F(KernelCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComment
kernel->getPatchInfoDataList().push_back(patchInfoData1); kernel->getPatchInfoDataList().push_back(patchInfoData1);
kernel->getPatchInfoDataList().push_back(patchInfoData2); kernel->getPatchInfoDataList().push_back(patchInfoData2);
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
auto offsetCrossThreadData = KernelCommandsHelper<FamilyType>::sendCrossThreadData( auto offsetCrossThreadData = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
indirectHeap, indirectHeap,
*kernel); *kernel,
false,
nullptr,
sizeCrossThreadData);
ASSERT_NE(0u, offsetCrossThreadData); ASSERT_NE(0u, offsetCrossThreadData);
EXPECT_EQ(128u, offsetCrossThreadData); EXPECT_EQ(128u, offsetCrossThreadData);
@@ -1204,7 +1221,7 @@ INSTANTIATE_TEST_CASE_P(ParentKernelCommandsFromBinaryTest,
::testing::Values(binaryFile), ::testing::Values(binaryFile),
::testing::ValuesIn(KernelNames))); ::testing::ValuesIn(KernelNames)));
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineAndCrossThreadSizeLesserEqualThanGrfThenReturnTrue) { HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineThenReturnTrue) {
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
DebugManager.flags.EnablePassInlineData.set(true); DebugManager.flags.EnablePassInlineData.set(true);
@@ -1217,7 +1234,7 @@ HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineAnd
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel)); EXPECT_TRUE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
} }
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineAndCrossThreadSizeLesserEqualThanGrfThenReturnFalse) { HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineThenReturnFalse) {
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
DebugManager.flags.EnablePassInlineData.set(true); DebugManager.flags.EnablePassInlineData.set(true);
@@ -1230,19 +1247,6 @@ HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInline
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel)); EXPECT_FALSE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
} }
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineAndCrossThreadSizeGreaterThanGrfThenReturnFalse) {
DebugManagerStateRestore restore;
DebugManager.flags.EnablePassInlineData.set(true);
uint32_t crossThreadData[16];
MockKernelWithInternals mockKernelWithInternal(*pDevice);
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->PassInlineData = 1;
mockKernelWithInternal.mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
}
HWTEST_F(KernelCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseIsTrue) { HWTEST_F(KernelCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseIsTrue) {
MockKernelWithInternals mockKernelWithInternal(*pDevice); MockKernelWithInternals mockKernelWithInternal(*pDevice);
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1; const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1;

View File

@@ -358,6 +358,42 @@ TEST_F(KernelFromBinaryTests, BuiltInIsSetToFalseForRegularKernels) {
EXPECT_FALSE(isBuiltIn); EXPECT_FALSE(isBuiltIn);
delete pKernel;
pKernel = nullptr;
pKernelInfo = pProgram->getKernelInfo("simple_kernel_3");
pKernel = Kernel::create(
pProgram,
*pKernelInfo,
&retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, pKernel);
// get builtIn property
isBuiltIn = pKernel->isBuiltIn;
EXPECT_FALSE(isBuiltIn);
delete pKernel;
pKernel = nullptr;
pKernelInfo = pProgram->getKernelInfo("simple_kernel_4");
pKernel = Kernel::create(
pProgram,
*pKernelInfo,
&retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, pKernel);
// get builtIn property
isBuiltIn = pKernel->isBuiltIn;
EXPECT_FALSE(isBuiltIn);
delete pKernel; delete pKernel;
} }

View File

@@ -1,23 +1,8 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (C) 2017-2018 Intel Corporation
* *
* Permission is hereby granted, free of charge, to any person obtaining a * SPDX-License-Identifier: MIT
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
* *
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/ */
__kernel void simple_kernel_0( __kernel void simple_kernel_0(
@@ -49,3 +34,11 @@ __kernel void simple_kernel_2(
dst[idx] = arg0; dst[idx] = arg0;
} }
__kernel void simple_kernel_3(
__global uint *dst) {
dst[get_local_id(0)] = 0;
}
__kernel void simple_kernel_4() {
}