mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 12:23:05 +08:00
Change interface to program cross-thread data
Change-Id: I96bf4bddf1557f588fd745efca7b19ec2f38a78e
This commit is contained in:
committed by
sys_ocldev
parent
31bf5b9b43
commit
f3a732081e
@@ -135,8 +135,8 @@ class GpgpuWalkerHelper {
|
||||
uint32_t simd,
|
||||
uint32_t workDim,
|
||||
bool localIdsGenerationByRuntime,
|
||||
bool kernelUsesLocalIds,
|
||||
bool inlineDataProgrammingRequired);
|
||||
bool inlineDataProgrammingRequired,
|
||||
const iOpenCL::SPatchThreadPayload &threadPayload);
|
||||
|
||||
static void dispatchProfilingCommandsStart(
|
||||
HwTimeStamps &hwTimeStamps,
|
||||
|
||||
@@ -20,8 +20,8 @@ inline size_t GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(
|
||||
uint32_t simd,
|
||||
uint32_t workDim,
|
||||
bool localIdsGenerationByRuntime,
|
||||
bool kernelUsesLocalIds,
|
||||
bool inlineDataProgrammingRequired) {
|
||||
bool inlineDataProgrammingRequired,
|
||||
const iOpenCL::SPatchThreadPayload &threadPayload) {
|
||||
auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2];
|
||||
|
||||
auto threadsPerWorkGroup = getThreadsPerWG(simd, localWorkSize);
|
||||
@@ -155,7 +155,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1};
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(pGpGpuWalkerCmd, globalOffsets, globalOffsets, workGroups, localWorkSizes,
|
||||
simd, 1, localIdsGenerationByRuntime, kernelUsesLocalIds, inlineDataProgrammingRequired);
|
||||
simd, 1, localIdsGenerationByRuntime, inlineDataProgrammingRequired,
|
||||
*scheduler.getKernelInfo().patchInfo.threadPayload);
|
||||
|
||||
// Implement disabling special WA DisableLSQCROPERFforOCL if needed
|
||||
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(commandStream, scheduler, false);
|
||||
|
||||
@@ -223,7 +223,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z};
|
||||
GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups,
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
localIdsGenerationByRuntime, kernelUsesLocalIds, inlineDataProgrammingRequired);
|
||||
localIdsGenerationByRuntime, inlineDataProgrammingRequired,
|
||||
*kernel.getKernelInfo().patchInfo.threadPayload);
|
||||
|
||||
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
|
||||
currentDispatchIndex++;
|
||||
|
||||
@@ -75,7 +75,10 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
||||
|
||||
static size_t sendCrossThreadData(
|
||||
IndirectHeap &indirectHeap,
|
||||
Kernel &kernel);
|
||||
Kernel &kernel,
|
||||
bool inlineDataProgrammingRequired,
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
uint32_t &sizeCrossThreadData);
|
||||
|
||||
static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const KernelInfo &srcKernelInfo,
|
||||
const void *srcKernelSsh, size_t srcKernelSshSize,
|
||||
@@ -197,18 +200,6 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
uint32_t &interfaceDescriptorIndex);
|
||||
|
||||
static void getCrossThreadData(
|
||||
uint32_t &sizeCrossThreadData,
|
||||
size_t &offsetCrossThreadData,
|
||||
Kernel &kernel,
|
||||
const bool &inlineDataProgrammingRequired,
|
||||
IndirectHeap &ioh,
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd);
|
||||
|
||||
inline static size_t getCrossThreadDataSize(
|
||||
uint32_t &sizeCrossThreadData,
|
||||
Kernel &kernel);
|
||||
|
||||
static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData);
|
||||
static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize);
|
||||
static void programPipeControlDataWriteWithCsStall(LinearStream &commandStream, uint64_t writeAddress, uint64_t data);
|
||||
|
||||
@@ -163,24 +163,6 @@ size_t KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
return (size_t)offsetInterfaceDescriptor;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
|
||||
IndirectHeap &indirectHeap,
|
||||
Kernel &kernel) {
|
||||
indirectHeap.align(GfxFamily::WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
|
||||
auto offsetCrossThreadData = indirectHeap.getUsed();
|
||||
auto sizeCrossThreadData = kernel.getCrossThreadDataSize();
|
||||
char *pDest = static_cast<char *>(indirectHeap.getSpace(sizeCrossThreadData));
|
||||
memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
return offsetCrossThreadData + static_cast<size_t>(indirectHeap.getHeapGpuStartOffset());
|
||||
}
|
||||
|
||||
// Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base addess)
|
||||
// as required by the INTERFACE_DESCRIPTOR_DATA.
|
||||
template <typename GfxFamily>
|
||||
@@ -308,21 +290,16 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
auto threadsPerThreadGroup = static_cast<uint32_t>(getThreadsPerWG(simd, localWorkItems));
|
||||
auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload);
|
||||
|
||||
uint32_t sizeCrossThreadData = 0;
|
||||
size_t offsetCrossThreadData = 0;
|
||||
uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize();
|
||||
|
||||
getCrossThreadData(
|
||||
sizeCrossThreadData,
|
||||
offsetCrossThreadData,
|
||||
kernel,
|
||||
inlineDataProgrammingRequired,
|
||||
ioh,
|
||||
walkerCmd);
|
||||
size_t offsetCrossThreadData = KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
|
||||
ioh, kernel, inlineDataProgrammingRequired,
|
||||
walkerCmd, sizeCrossThreadData);
|
||||
|
||||
size_t sizePerThreadDataTotal = 0;
|
||||
size_t sizePerThreadData = 0;
|
||||
|
||||
programPerThreadData(
|
||||
KernelCommandsHelper<GfxFamily>::programPerThreadData(
|
||||
sizePerThreadData,
|
||||
localIdsGenerationByRuntime,
|
||||
ioh,
|
||||
@@ -345,7 +322,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
dsh,
|
||||
offsetInterfaceDescriptor,
|
||||
kernelStartOffset,
|
||||
getCrossThreadDataSize(sizeCrossThreadData, kernel),
|
||||
sizeCrossThreadData,
|
||||
sizePerThreadData,
|
||||
dstBindingTablePointer,
|
||||
samplerStateOffset,
|
||||
@@ -441,8 +418,7 @@ bool KernelCommandsHelper<GfxFamily>::doBindingTablePrefetch() {
|
||||
template <typename GfxFamily>
|
||||
bool KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(const Kernel &kernel) {
|
||||
if (DebugManager.flags.EnablePassInlineData.get()) {
|
||||
return kernel.getKernelInfo().patchInfo.threadPayload->PassInlineData &&
|
||||
kernel.getCrossThreadDataSize() <= sizeof(GRF);
|
||||
return kernel.getKernelInfo().patchInfo.threadPayload->PassInlineData;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -117,6 +117,26 @@ void KernelCommandsHelper<GfxFamily>::programPerThreadData(
|
||||
updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t KernelCommandsHelper<GfxFamily>::sendCrossThreadData(
|
||||
IndirectHeap &indirectHeap,
|
||||
Kernel &kernel,
|
||||
bool inlineDataProgrammingRequired,
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd,
|
||||
uint32_t &sizeCrossThreadData) {
|
||||
indirectHeap.align(WALKER_TYPE<GfxFamily>::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
||||
|
||||
auto offsetCrossThreadData = indirectHeap.getUsed();
|
||||
char *pDest = static_cast<char *>(indirectHeap.getSpace(sizeCrossThreadData));
|
||||
memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress());
|
||||
}
|
||||
|
||||
return offsetCrossThreadData + static_cast<size_t>(indirectHeap.getHeapGpuStartOffset());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool KernelCommandsHelper<GfxFamily>::resetBindingTablePrefetch(Kernel &kernel) {
|
||||
return kernel.isSchedulerKernel || !doBindingTablePrefetch();
|
||||
@@ -130,29 +150,6 @@ void KernelCommandsHelper<GfxFamily>::setInterfaceDescriptorOffset(
|
||||
walkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void KernelCommandsHelper<GfxFamily>::getCrossThreadData(
|
||||
uint32_t &sizeCrossThreadData,
|
||||
size_t &offsetCrossThreadData,
|
||||
Kernel &kernel,
|
||||
const bool &inlineDataProgrammingRequired,
|
||||
IndirectHeap &ioh,
|
||||
WALKER_TYPE<GfxFamily> *walkerCmd) {
|
||||
|
||||
sizeCrossThreadData = kernel.getCrossThreadDataSize();
|
||||
offsetCrossThreadData = sendCrossThreadData(
|
||||
ioh,
|
||||
kernel);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
size_t KernelCommandsHelper<GfxFamily>::getCrossThreadDataSize(
|
||||
uint32_t &sizeCrossThreadData,
|
||||
Kernel &kernel) {
|
||||
|
||||
return sizeCrossThreadData;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired(uint32_t workDim, size_t *gws, size_t *lws) {
|
||||
return true;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "hw_cmds.h"
|
||||
#include "patch_shared.h"
|
||||
#include "runtime/command_queue/gpgpu_walker.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
@@ -84,8 +85,9 @@ struct WorkGroupSizeBase {
|
||||
(workItems[0] + workGroupSize[0] - 1) / workGroupSize[0],
|
||||
(workItems[1] + workGroupSize[1] - 1) / workGroupSize[1],
|
||||
(workItems[2] + workGroupSize[2] - 1) / workGroupSize[2]};
|
||||
const iOpenCL::SPatchThreadPayload threadPayload = {};
|
||||
GpgpuWalkerHelper<FamilyType>::setGpgpuWalkerThreadData(&pCmd, globalOffsets, workGroupsStart, workGroupsNum,
|
||||
workGroupSize, simdSize, dims, true, false, false);
|
||||
workGroupSize, simdSize, dims, true, false, threadPayload);
|
||||
|
||||
//And check if it is programmed correctly
|
||||
auto numWorkItems = computeWalkerWorkItems<FamilyType>(pCmd);
|
||||
|
||||
@@ -155,10 +155,13 @@ HWTEST_F(KernelCommandsTest, sendCrossThreadDataResourceUsage) {
|
||||
|
||||
auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192);
|
||||
auto usedBefore = indirectHeap.getUsed();
|
||||
|
||||
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
|
||||
KernelCommandsHelper<FamilyType>::sendCrossThreadData(
|
||||
indirectHeap,
|
||||
*kernel);
|
||||
*kernel,
|
||||
false,
|
||||
nullptr,
|
||||
sizeCrossThreadData);
|
||||
|
||||
auto usedAfter = indirectHeap.getUsed();
|
||||
EXPECT_EQ(kernel->getCrossThreadDataSize(), usedAfter - usedBefore);
|
||||
@@ -178,10 +181,13 @@ HWTEST_F(KernelCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComment
|
||||
|
||||
PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap};
|
||||
kernel->getPatchInfoDataList().push_back(patchInfoData);
|
||||
|
||||
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
|
||||
KernelCommandsHelper<FamilyType>::sendCrossThreadData(
|
||||
indirectHeap,
|
||||
*kernel);
|
||||
*kernel,
|
||||
false,
|
||||
nullptr,
|
||||
sizeCrossThreadData);
|
||||
|
||||
ASSERT_EQ(1u, kernel->getPatchInfoDataList().size());
|
||||
EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation);
|
||||
@@ -197,9 +203,13 @@ HWTEST_F(KernelCommandsTest, givenIndirectHeapNotAllocatedFromInternalPoolWhenSe
|
||||
IndirectHeap indirectHeap(nonInternalAllocation, false);
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
auto sizeCrossThreadData = mockKernelWithInternal.mockKernel->getCrossThreadDataSize();
|
||||
auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
|
||||
indirectHeap,
|
||||
*mockKernelWithInternal.mockKernel);
|
||||
*mockKernelWithInternal.mockKernel,
|
||||
false,
|
||||
nullptr,
|
||||
sizeCrossThreadData);
|
||||
EXPECT_EQ(0u, offset);
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(nonInternalAllocation);
|
||||
}
|
||||
@@ -210,9 +220,13 @@ HWTEST_F(KernelCommandsTest, givenIndirectHeapAllocatedFromInternalPoolWhenSendC
|
||||
auto expectedOffset = internalAllocation->getGpuAddressToPatch();
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
auto sizeCrossThreadData = mockKernelWithInternal.mockKernel->getCrossThreadDataSize();
|
||||
auto offset = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
|
||||
indirectHeap,
|
||||
*mockKernelWithInternal.mockKernel);
|
||||
*mockKernelWithInternal.mockKernel,
|
||||
false,
|
||||
nullptr,
|
||||
sizeCrossThreadData);
|
||||
EXPECT_EQ(expectedOffset, offset);
|
||||
|
||||
pDevice->getMemoryManager()->freeGraphicsMemory(internalAllocation);
|
||||
@@ -239,10 +253,13 @@ HWTEST_F(KernelCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComment
|
||||
|
||||
kernel->getPatchInfoDataList().push_back(patchInfoData1);
|
||||
kernel->getPatchInfoDataList().push_back(patchInfoData2);
|
||||
|
||||
auto sizeCrossThreadData = kernel->getCrossThreadDataSize();
|
||||
auto offsetCrossThreadData = KernelCommandsHelper<FamilyType>::sendCrossThreadData(
|
||||
indirectHeap,
|
||||
*kernel);
|
||||
*kernel,
|
||||
false,
|
||||
nullptr,
|
||||
sizeCrossThreadData);
|
||||
|
||||
ASSERT_NE(0u, offsetCrossThreadData);
|
||||
EXPECT_EQ(128u, offsetCrossThreadData);
|
||||
@@ -1204,7 +1221,7 @@ INSTANTIATE_TEST_CASE_P(ParentKernelCommandsFromBinaryTest,
|
||||
::testing::Values(binaryFile),
|
||||
::testing::ValuesIn(KernelNames)));
|
||||
|
||||
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineAndCrossThreadSizeLesserEqualThanGrfThenReturnTrue) {
|
||||
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineThenReturnTrue) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnablePassInlineData.set(true);
|
||||
|
||||
@@ -1217,7 +1234,7 @@ HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineAnd
|
||||
EXPECT_TRUE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineAndCrossThreadSizeLesserEqualThanGrfThenReturnFalse) {
|
||||
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineThenReturnFalse) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnablePassInlineData.set(true);
|
||||
|
||||
@@ -1230,19 +1247,6 @@ HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInline
|
||||
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineAndCrossThreadSizeGreaterThanGrfThenReturnFalse) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.EnablePassInlineData.set(true);
|
||||
|
||||
uint32_t crossThreadData[16];
|
||||
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->PassInlineData = 1;
|
||||
mockKernelWithInternal.mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData));
|
||||
|
||||
EXPECT_FALSE(KernelCommandsHelper<FamilyType>::inlineDataProgrammingRequired(*mockKernelWithInternal.mockKernel));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseIsTrue) {
|
||||
MockKernelWithInternals mockKernelWithInternal(*pDevice);
|
||||
const_cast<SPatchThreadPayload *>(mockKernelWithInternal.kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1;
|
||||
|
||||
@@ -358,6 +358,42 @@ TEST_F(KernelFromBinaryTests, BuiltInIsSetToFalseForRegularKernels) {
|
||||
|
||||
EXPECT_FALSE(isBuiltIn);
|
||||
|
||||
delete pKernel;
|
||||
pKernel = nullptr;
|
||||
|
||||
pKernelInfo = pProgram->getKernelInfo("simple_kernel_3");
|
||||
|
||||
pKernel = Kernel::create(
|
||||
pProgram,
|
||||
*pKernelInfo,
|
||||
&retVal);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
|
||||
// get builtIn property
|
||||
isBuiltIn = pKernel->isBuiltIn;
|
||||
|
||||
EXPECT_FALSE(isBuiltIn);
|
||||
|
||||
delete pKernel;
|
||||
pKernel = nullptr;
|
||||
|
||||
pKernelInfo = pProgram->getKernelInfo("simple_kernel_4");
|
||||
|
||||
pKernel = Kernel::create(
|
||||
pProgram,
|
||||
*pKernelInfo,
|
||||
&retVal);
|
||||
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
ASSERT_NE(nullptr, pKernel);
|
||||
|
||||
// get builtIn property
|
||||
isBuiltIn = pKernel->isBuiltIn;
|
||||
|
||||
EXPECT_FALSE(isBuiltIn);
|
||||
|
||||
delete pKernel;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,23 +1,8 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
__kernel void simple_kernel_0(
|
||||
@@ -49,3 +34,11 @@ __kernel void simple_kernel_2(
|
||||
|
||||
dst[idx] = arg0;
|
||||
}
|
||||
|
||||
__kernel void simple_kernel_3(
|
||||
__global uint *dst) {
|
||||
dst[get_local_id(0)] = 0;
|
||||
}
|
||||
|
||||
__kernel void simple_kernel_4() {
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user