feature: use heapless builtins in OCL if supported

Related-To: NEO-7621
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk 2024-02-07 14:43:44 +00:00 committed by Compute-Runtime-Automation
parent 6f69fa997a
commit 59f661356c
26 changed files with 609 additions and 101 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -156,6 +156,24 @@ class BuiltInOp<EBuiltInOps::copyBufferToBufferStateless> : public BuiltInOp<EBu
}
};
template <>
class BuiltInOp<EBuiltInOps::copyBufferToBufferStatelessHeapless> : public BuiltInOp<EBuiltInOps::copyBufferToBuffer> {
public:
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
: BuiltInOp<EBuiltInOps::copyBufferToBuffer>(kernelsLib, device, false) {
populate(EBuiltInOps::copyBufferToBufferStatelessHeapless,
CompilerOptions::greaterThan4gbBuffersRequired,
"CopyBufferToBufferLeftLeftover", kernLeftLeftover,
"CopyBufferToBufferMiddle", kernMiddle,
"CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned,
"CopyBufferToBufferRightLeftover", kernRightLeftover);
}
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
}
};
template <>
class BuiltInOp<EBuiltInOps::copyBufferRect> : public BuiltinDispatchInfoBuilder {
public:
@ -285,6 +303,22 @@ class BuiltInOp<EBuiltInOps::copyBufferRectStateless> : public BuiltInOp<EBuiltI
}
};
template <>
class BuiltInOp<EBuiltInOps::copyBufferRectStatelessHeapless> : public BuiltInOp<EBuiltInOps::copyBufferRect> {
public:
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
: BuiltInOp<EBuiltInOps::copyBufferRect>(kernelsLib, device, false) {
populate(EBuiltInOps::copyBufferRectStatelessHeapless,
CompilerOptions::greaterThan4gbBuffersRequired,
"CopyBufferRectBytes2d", kernelBytes[0],
"CopyBufferRectBytes2d", kernelBytes[1],
"CopyBufferRectBytes3d", kernelBytes[2]);
}
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
}
};
template <>
class BuiltInOp<EBuiltInOps::fillBuffer> : public BuiltinDispatchInfoBuilder {
public:
@ -393,6 +427,21 @@ class BuiltInOp<EBuiltInOps::fillBufferStateless> : public BuiltInOp<EBuiltInOps
}
};
template <>
class BuiltInOp<EBuiltInOps::fillBufferStatelessHeapless> : public BuiltInOp<EBuiltInOps::fillBuffer> {
public:
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp<EBuiltInOps::fillBuffer>(kernelsLib, device, false) {
populate(EBuiltInOps::fillBufferStatelessHeapless,
CompilerOptions::greaterThan4gbBuffersRequired,
"FillBufferLeftLeftover", kernLeftLeftover,
"FillBufferMiddle", kernMiddle,
"FillBufferRightLeftover", kernRightLeftover);
}
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfos) const override {
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfos);
}
};
template <>
class BuiltInOp<EBuiltInOps::copyBufferToImage3d> : public BuiltinDispatchInfoBuilder {
public:
@ -764,18 +813,27 @@ BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuil
case EBuiltInOps::copyBufferToBufferStateless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToBufferStateless>>(builtins, device); });
break;
case EBuiltInOps::copyBufferToBufferStatelessHeapless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToBufferStatelessHeapless>>(builtins, device); });
break;
case EBuiltInOps::copyBufferRect:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRect>>(builtins, device); });
break;
case EBuiltInOps::copyBufferRectStateless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRectStateless>>(builtins, device); });
break;
case EBuiltInOps::copyBufferRectStatelessHeapless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferRectStatelessHeapless>>(builtins, device); });
break;
case EBuiltInOps::fillBuffer:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBuffer>>(builtins, device); });
break;
case EBuiltInOps::fillBufferStateless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBufferStateless>>(builtins, device); });
break;
case EBuiltInOps::fillBufferStatelessHeapless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::fillBufferStatelessHeapless>>(builtins, device); });
break;
case EBuiltInOps::copyBufferToImage3d:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::copyBufferToImage3d>>(builtins, device); });
break;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -23,15 +23,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
auto eBuiltInOpsType = EBuiltInOps::copyBufferToBuffer;
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER;
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), &size};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
eBuiltInOpsType = EBuiltInOps::copyBufferToBufferStateless;
}
const bool useStateless = forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()));
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(useStateless, useHeapless);
BuiltinOpParams dc;
dc.srcMemObj = srcBuffer;
@ -48,7 +48,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
MemObjSurface s2(dstBuffer);
Surface *surfaces[] = {&s1, &s2};
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, false, csr);
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -28,15 +28,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
auto eBuiltInOps = EBuiltInOps::copyBufferRect;
constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER_RECT;
CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), region};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
eBuiltInOps = EBuiltInOps::copyBufferRectStateless;
}
const bool useStateless = forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()));
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(useStateless, useHeapless);
MemObjSurface srcBufferSurf(srcBuffer);
MemObjSurface dstBufferSurf(dstBuffer);
@ -56,7 +56,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
dc.direction = csrSelectionArgs.direction;
MultiDispatchInfo dispatchInfo(dc);
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, false, csr);
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -24,13 +24,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferToImage(
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
auto eBuiltInOpsType = EBuiltInOps::copyBufferToImage3d;
if (forceStateless(srcBuffer->getSize())) {
eBuiltInOpsType = EBuiltInOps::copyBufferToImage3dStateless;
}
const bool useStateless = forceStateless(srcBuffer->getSize());
const bool useHeapless = false;
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(useStateless, useHeapless);
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
this->getClDevice());
BuiltInOwnershipWrapper builtInLock(builder, this->context);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -24,12 +24,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImageToBuffer(
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event) {
auto eBuiltInOpsType = EBuiltInOps::copyImage3dToBuffer;
if (forceStateless(dstBuffer->getSize())) {
eBuiltInOpsType = EBuiltInOps::copyImage3dToBufferStateless;
}
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
const bool useStateless = forceStateless(dstBuffer->getSize());
const bool useHeapless = false;
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(useStateless, useHeapless);
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
this->getClDevice());
BuiltInOwnershipWrapper builtInLock(builder, this->context);

View File

@ -50,12 +50,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize);
}
auto eBuiltInOps = EBuiltInOps::fillBuffer;
if (forceStateless(buffer->getSize())) {
eBuiltInOps = EBuiltInOps::fillBufferStateless;
}
const bool useStateless = forceStateless(buffer->getSize());
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(useStateless, useHeapless);
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps,
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
this->getClDevice());
BuiltInOwnershipWrapper builtInLock(builder, this->context);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -70,10 +70,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
numEventsInWaitList, eventWaitList, event);
}
auto eBuiltInOps = EBuiltInOps::copyBufferToBuffer;
if (forceStateless(buffer->getSize())) {
eBuiltInOps = EBuiltInOps::copyBufferToBufferStateless;
}
const bool useStateless = forceStateless(buffer->getSize());
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(useStateless, useHeapless);
void *dstPtr = ptr;
@ -121,7 +120,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
}
}
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
return dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -54,10 +54,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
bool isCpuCopyAllowed = false;
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
auto eBuiltInOps = EBuiltInOps::copyBufferRect;
if (forceStateless(buffer->getSize())) {
eBuiltInOps = EBuiltInOps::copyBufferRectStateless;
}
const bool useStateless = forceStateless(buffer->getSize());
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(useStateless, useHeapless);
void *dstPtr = ptr;
@ -103,7 +102,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
dc.direction = csrSelectionArgs.direction;
MultiDispatchInfo dispatchInfo(dc);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}

View File

@ -354,10 +354,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
isStatelessRequired |= forceStateless(dstSvmData->size);
}
auto builtInType = EBuiltInOps::copyBufferToBuffer;
if (isStatelessRequired) {
builtInType = EBuiltInOps::copyBufferToBufferStateless;
}
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(isStatelessRequired, useHeapless);
MultiDispatchInfo dispatchInfo;
BuiltinOpParams operationParams;
@ -510,10 +508,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemFill(void *svmPtr,
memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize);
}
auto builtInType = EBuiltInOps::fillBuffer;
if (forceStateless(svmData->size)) {
builtInType = EBuiltInOps::fillBufferStateless;
}
const bool useStateless = forceStateless(svmData->size);
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(useStateless, useHeapless);
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
this->getClDevice());

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -58,10 +58,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
numEventsInWaitList, eventWaitList, event);
}
auto eBuiltInOps = EBuiltInOps::copyBufferToBuffer;
if (forceStateless(buffer->getSize())) {
eBuiltInOps = EBuiltInOps::copyBufferToBufferStateless;
}
const bool useStateless = forceStateless(buffer->getSize());
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(useStateless, useHeapless);
void *srcPtr = const_cast<void *>(ptr);
@ -102,7 +101,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
dc.direction = csrSelectionArgs.direction;
MultiDispatchInfo dispatchInfo(dc);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -58,10 +58,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
bool isCpuCopyAllowed = false;
getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed);
auto eBuiltInOps = EBuiltInOps::copyBufferRect;
if (forceStateless(buffer->getSize())) {
eBuiltInOps = EBuiltInOps::copyBufferRectStateless;
}
const bool useStateless = forceStateless(buffer->getSize());
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(useStateless, useHeapless);
void *srcPtr = const_cast<void *>(ptr);
@ -108,7 +107,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
dc.direction = csrSelectionArgs.direction;
MultiDispatchInfo dispatchInfo(dc);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
const auto dispatchResult = dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
if (dispatchResult != CL_SUCCESS) {
return dispatchResult;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -146,6 +146,13 @@ struct AuxBuiltinsMatcher {
}
};
struct HeaplessSupportedMatcher {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
}
};
HWTEST2_F(BuiltInTests, GivenBuiltinTypeBinaryWhenGettingAuxTranslationBuiltinThenResourceSizeIsNonZero, MatchAny) {
auto mockBuiltinsLib = std::unique_ptr<MockBuiltinsLib>(new MockBuiltinsLib());
@ -2413,3 +2420,184 @@ HWTEST_F(BuiltInOwnershipWrapperTests, givenBuiltInOwnershipWrapperWhenAskedForT
EXPECT_FALSE(std::is_copy_constructible<BuiltInOwnershipWrapper>::value);
EXPECT_FALSE(std::is_copy_assignable<BuiltInOwnershipWrapper>::value);
}
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferToBufferStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
BuiltinOpParams builtinOpsParams;
builtinOpsParams.srcMemObj = &srcBuffer;
builtinOpsParams.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
builtinOpsParams.dstMemObj = &dstBuffer;
builtinOpsParams.dstOffset = {0, 0, 0};
builtinOpsParams.size = {static_cast<size_t>(size), 0, 0};
MultiDispatchInfo multiDispatchInfo(builtinOpsParams);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams));
}
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToSystemBufferRectStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
BuiltinOpParams dc;
dc.srcMemObj = &srcBuffer;
dc.dstMemObj = &dstBuffer;
dc.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
dc.dstOffset = {0, 0, 0};
dc.size = {static_cast<size_t>(size), 1, 1};
dc.srcRowPitch = static_cast<size_t>(size);
dc.srcSlicePitch = 0;
dc.dstRowPitch = static_cast<size_t>(size);
dc.dstSlicePitch = 0;
MultiDispatchInfo multiDispatchInfo(dc);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
for (auto &dispatchInfo : multiDispatchInfo) {
EXPECT_TRUE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
}
}
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToLocalBufferRectStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
BuiltinOpParams dc;
dc.srcMemObj = &srcBuffer;
dc.dstMemObj = &dstBuffer;
dc.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
dc.dstOffset = {0, 0, 0};
dc.size = {static_cast<size_t>(size), 1, 1};
dc.srcRowPitch = static_cast<size_t>(size);
dc.srcSlicePitch = 0;
dc.dstRowPitch = static_cast<size_t>(size);
dc.dstSlicePitch = 0;
MultiDispatchInfo multiDispatchInfo(dc);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
for (auto &dispatchInfo : multiDispatchInfo) {
EXPECT_FALSE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
}
}
HWTEST2_F(BuiltInTests, whenBuilderFillSystemBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
BuiltinOpParams dc;
dc.srcMemObj = &srcBuffer;
dc.dstMemObj = &dstBuffer;
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
dc.size = {static_cast<size_t>(size), 0, 0};
MultiDispatchInfo multiDispatchInfo(dc);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
for (auto &dispatchInfo : multiDispatchInfo) {
EXPECT_TRUE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
}
}
HWTEST2_F(BuiltInTests, whenBuilderFillLocalBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
BuiltinOpParams dc;
dc.srcMemObj = &srcBuffer;
dc.dstMemObj = &dstBuffer;
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
dc.size = {static_cast<size_t>(size), 0, 0};
MultiDispatchInfo multiDispatchInfo(dc);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
for (auto &dispatchInfo : multiDispatchInfo) {
EXPECT_FALSE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -19,6 +19,8 @@
#include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "test_traits_common.h"
namespace NEO {
struct CommandDeviceFixture : public ClDeviceFixture,
@ -147,4 +149,11 @@ struct CommandQueueStateful : public CommandQueueHw<FamilyType> {
bool expectedKernelSystemMemory = false;
};
struct HeaplessSupportedMatcher {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
}
};
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -252,6 +252,47 @@ HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessK
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
}
HWTEST2_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless,
pCmdQ->getClDevice());
ASSERT_NE(nullptr, &builder);
BuiltinOpParams dc;
dc.srcMemObj = srcBuffer;
dc.dstMemObj = dstBuffer;
dc.srcOffset = {0, 0, 0};
dc.dstOffset = {0, 0, 0};
dc.size = {50, 50, 1};
dc.srcRowPitch = rowPitch;
dc.srcSlicePitch = slicePitch;
dc.dstRowPitch = rowPitch;
dc.dstSlicePitch = slicePitch;
MultiDispatchInfo multiDispatchInfo(dc);
builder.buildDispatchInfos(multiDispatchInfo);
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
EXPECT_EQ(8u, scratchPointerAddress.offset);
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) {
enqueueCopyBufferRect2D<FamilyType>();
validateL3Programming<FamilyType>(cmdList, itorWalker);

View File

@ -263,6 +263,45 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsU
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
}
HWTEST2_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
auto dstBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferToBufferStatelessHeapless,
pCmdQ->getClDevice());
ASSERT_NE(nullptr, &builder);
BuiltinOpParams dc;
dc.srcMemObj = srcBuffer.get();
dc.dstMemObj = dstBuffer.get();
dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0};
dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0};
dc.size = {EnqueueCopyBufferTraits::size, 0, 0};
MultiDispatchInfo multiDispatchInfo(dc);
builder.buildDispatchInfos(multiDispatchInfo);
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
EXPECT_EQ(8u, scratchPointerAddress.offset);
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
}
HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) {
enqueueCopyBufferAndParse<FamilyType>();
validateL3Programming<FamilyType>(cmdList, itorWalker);

View File

@ -344,6 +344,49 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentTwoShouldMatchP
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
}
HWTEST2_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});
// Extract the kernel used
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless,
pCmdQ->getClDevice());
ASSERT_NE(nullptr, &builder);
BuiltinOpParams dc;
MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(),
patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true);
dc.srcMemObj = &patternMemObj;
dc.dstMemObj = buffer;
dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0};
dc.size = {EnqueueFillBufferTraits::size, 0, 0};
MultiDispatchInfo multiDispatchInfo(dc);
builder.buildDispatchInfos(multiDispatchInfo);
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
EXPECT_EQ(8u, scratchPointerAddress.offset);
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
}
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKernelIsUsed) {
auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2023 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -17,25 +17,28 @@ using Type = uint32_t;
inline constexpr Type auxTranslation{0};
inline constexpr Type copyBufferToBuffer{1};
inline constexpr Type copyBufferToBufferStateless{2};
inline constexpr Type copyBufferRect{3};
inline constexpr Type copyBufferRectStateless{4};
inline constexpr Type fillBuffer{5};
inline constexpr Type fillBufferStateless{6};
inline constexpr Type copyBufferToImage3d{7};
inline constexpr Type copyBufferToImage3dStateless{8};
inline constexpr Type copyImage3dToBuffer{9};
inline constexpr Type copyImage3dToBufferStateless{10};
inline constexpr Type copyImageToImage1d{11};
inline constexpr Type copyImageToImage2d{12};
inline constexpr Type copyImageToImage3d{13};
inline constexpr Type fillImage1d{14};
inline constexpr Type fillImage2d{15};
inline constexpr Type fillImage3d{16};
inline constexpr Type queryKernelTimestamps{17};
inline constexpr Type copyBufferToBufferStatelessHeapless{3};
inline constexpr Type copyBufferRect{4};
inline constexpr Type copyBufferRectStateless{5};
inline constexpr Type copyBufferRectStatelessHeapless{6};
inline constexpr Type fillBuffer{7};
inline constexpr Type fillBufferStateless{8};
inline constexpr Type fillBufferStatelessHeapless{9};
inline constexpr Type copyBufferToImage3d{10};
inline constexpr Type copyBufferToImage3dStateless{11};
inline constexpr Type copyImage3dToBuffer{12};
inline constexpr Type copyImage3dToBufferStateless{13};
inline constexpr Type copyImageToImage1d{14};
inline constexpr Type copyImageToImage2d{15};
inline constexpr Type copyImageToImage3d{16};
inline constexpr Type fillImage1d{17};
inline constexpr Type fillImage2d{18};
inline constexpr Type fillImage3d{19};
inline constexpr Type queryKernelTimestamps{20};
constexpr bool isStateless(Type type) {
constexpr std::array<Type, 5> statelessBuiltins{{copyBufferToBufferStateless, copyBufferRectStateless, fillBufferStateless, copyBufferToImage3dStateless, copyImage3dToBufferStateless}};
for (auto &builtinType : statelessBuiltins) {
constexpr std::array<Type, 8> statelessBuiltins{{copyBufferToBufferStateless, copyBufferRectStateless, fillBufferStateless, copyBufferToImage3dStateless, copyImage3dToBufferStateless, copyBufferToBufferStatelessHeapless, copyBufferRectStatelessHeapless, fillBufferStatelessHeapless}};
for (auto builtinType : statelessBuiltins) {
if (type == builtinType) {
return true;
}
@ -43,7 +46,68 @@ constexpr bool isStateless(Type type) {
return false;
}
inline constexpr Type maxBaseValue{17};
constexpr bool isHeapless(Type type) {
constexpr Type statelessBuiltins[] = {copyBufferToBufferStatelessHeapless, copyBufferRectStatelessHeapless, fillBufferStatelessHeapless};
for (auto builtinType : statelessBuiltins) {
if (type == builtinType) {
return true;
}
}
return false;
}
template <Type builtinType>
constexpr uint32_t adjustBuiltinType(const bool useStateless, const bool useHeapless) {
return builtinType;
}
template <>
constexpr uint32_t adjustBuiltinType<copyBufferToBuffer>(const bool useStateless, const bool useHeapless) {
if (useHeapless) {
return copyBufferToBufferStatelessHeapless;
} else if (useStateless) {
return copyBufferToBufferStateless;
}
return copyBufferToBuffer;
}
template <>
constexpr uint32_t adjustBuiltinType<copyBufferRect>(const bool useStateless, const bool useHeapless) {
if (useHeapless) {
return copyBufferRectStatelessHeapless;
} else if (useStateless) {
return copyBufferRectStateless;
}
return copyBufferRect;
}
template <>
constexpr uint32_t adjustBuiltinType<fillBuffer>(const bool useStateless, const bool useHeapless) {
if (useHeapless) {
return fillBufferStatelessHeapless;
} else if (useStateless) {
return fillBufferStateless;
}
return fillBuffer;
}
template <>
constexpr uint32_t adjustBuiltinType<copyBufferToImage3d>(const bool useStateless, const bool useHeapless) {
if (useStateless) {
return copyBufferToImage3dStateless;
}
return copyBufferToImage3d;
}
template <>
constexpr uint32_t adjustBuiltinType<copyImage3dToBuffer>(const bool useStateless, const bool useHeapless) {
if (useStateless) {
return copyImage3dToBufferStateless;
}
return copyImage3dToBuffer;
}
inline constexpr Type maxBaseValue{20};
inline constexpr Type count{64};
} // namespace EBuiltInOps
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -35,14 +35,17 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
case EBuiltInOps::copyBufferToBuffer:
return "copy_buffer_to_buffer.builtin_kernel";
case EBuiltInOps::copyBufferToBufferStateless:
case EBuiltInOps::copyBufferToBufferStatelessHeapless:
return "copy_buffer_to_buffer_stateless.builtin_kernel";
case EBuiltInOps::copyBufferRect:
return "copy_buffer_rect.builtin_kernel";
case EBuiltInOps::copyBufferRectStateless:
case EBuiltInOps::copyBufferRectStatelessHeapless:
return "copy_buffer_rect_stateless.builtin_kernel";
case EBuiltInOps::fillBuffer:
return "fill_buffer.builtin_kernel";
case EBuiltInOps::fillBufferStateless:
case EBuiltInOps::fillBufferStatelessHeapless:
return "fill_buffer_stateless.builtin_kernel";
case EBuiltInOps::copyBufferToImage3d:
return "copy_buffer_to_image3d.builtin_kernel";
@ -92,14 +95,15 @@ StackVec<std::string, 3> getBuiltinResourceNames(EBuiltInOps::Type builtin, Buil
return deviceId.str();
};
const auto deviceIp = createDeviceIdFilenameComponent(hwInfo.ipVersion);
const auto builtinName = getBuiltinAsString(builtin);
const auto builtinFilename = getBuiltinAsString(builtin);
const auto extension = BuiltinCode::getExtension(type);
auto getAddressingMode = [type, &productHelper, releaseHelper, builtin]() {
auto getAddressingModePrefix = [type, &productHelper, releaseHelper, builtin]() {
if (type == BuiltinCode::ECodeType::binary) {
const bool requiresStatelessAddressing = (false == productHelper.isStatefulAddressingModeSupported());
const bool builtInUsesStatelessAddressing = EBuiltInOps::isStateless(builtin);
const bool heaplessEnabled = EBuiltInOps::isHeapless(builtin);
if (builtInUsesStatelessAddressing || requiresStatelessAddressing) {
return "stateless_";
return heaplessEnabled ? "stateless_heapless_" : "stateless_";
} else if (ApiSpecificConfig::getBindlessMode(releaseHelper)) {
return "bindless_";
} else {
@ -108,21 +112,21 @@ StackVec<std::string, 3> getBuiltinResourceNames(EBuiltInOps::Type builtin, Buil
}
return "";
};
const auto addressingMode = getAddressingMode();
const auto addressingModePrefix = getAddressingModePrefix();
auto createBuiltinResourceName = [](ConstStringRef deviceIpPath, ConstStringRef addressingMode, ConstStringRef builtinName, ConstStringRef extension) {
auto createBuiltinResourceName = [](ConstStringRef deviceIpPath, ConstStringRef addressingModePrefix, ConstStringRef builtinFilename, ConstStringRef extension) {
std::ostringstream outResourceName;
if (false == deviceIpPath.empty()) {
outResourceName << deviceIpPath.str() << "_";
}
outResourceName << addressingMode.str() << builtinName.str() << extension.str();
outResourceName << addressingModePrefix.str() << builtinFilename.str() << extension.str();
return outResourceName.str();
};
StackVec<std::string, 3> resourcesToLookup = {};
resourcesToLookup.push_back(createBuiltinResourceName(deviceIp, addressingMode, builtinName, extension));
resourcesToLookup.push_back(createBuiltinResourceName(deviceIp, addressingModePrefix, builtinFilename, extension));
if (BuiltinCode::ECodeType::binary != type) {
resourcesToLookup.push_back(createBuiltinResourceName("", addressingMode, builtinName, extension));
resourcesToLookup.push_back(createBuiltinResourceName("", addressingModePrefix, builtinFilename, extension));
}
return resourcesToLookup;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -30,6 +30,15 @@ static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc(
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferToBufferStatelessHeaplessSrc(
createBuiltinResourceName(
EBuiltInOps::copyBufferToBufferStatelessHeapless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferRectSrc(
createBuiltinResourceName(
EBuiltInOps::copyBufferRect,
@ -48,6 +57,15 @@ static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc(
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferRectStatelessHeaplessSrc(
createBuiltinResourceName(
EBuiltInOps::copyBufferRectStatelessHeapless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerFillBufferSrc(
createBuiltinResourceName(
EBuiltInOps::fillBuffer,
@ -66,6 +84,15 @@ static RegisterEmbeddedResource registerFillBufferStatelessSrc(
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerFillBufferStatelessHeaplessSrc(
createBuiltinResourceName(
EBuiltInOps::fillBufferStatelessHeapless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::source))
.c_str(),
std::string(
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
));
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
createBuiltinResourceName(
EBuiltInOps::copyBufferToImage3d,

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -21,4 +21,5 @@ struct TestTraits<IGFX_GEN11_CORE> {
static constexpr bool imagesSupported = true;
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true;
static constexpr bool programComputeModeCommandProgramsNonCoherent = true;
static constexpr bool heaplessAllowed = false;
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -20,4 +20,5 @@ struct TestTraits<IGFX_GEN12LP_CORE> {
static constexpr bool forceGpuNonCoherent = true;
static constexpr bool imagesSupported = true;
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
static constexpr bool heaplessAllowed = false;
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -20,4 +20,5 @@ struct TestTraits<IGFX_GEN8_CORE> {
static constexpr bool imagesSupported = true;
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = false;
static constexpr bool programComputeModeCommandProgramsNonCoherent = false;
static constexpr bool heaplessAllowed = false;
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -20,4 +20,5 @@ struct TestTraits<IGFX_GEN9_CORE> {
static constexpr bool imagesSupported = true;
static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true;
static constexpr bool programComputeModeCommandProgramsNonCoherent = false;
static constexpr bool heaplessAllowed = false;
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -30,4 +30,5 @@ struct TestTraits<IGFX_XE_HPC_CORE> {
static constexpr bool imagesSupported = false;
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = true;
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
static constexpr bool heaplessAllowed = false;
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -30,6 +30,7 @@ struct TestTraits<IGFX_XE_HPG_CORE> {
static constexpr bool imagesSupported = true;
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = false;
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
static constexpr bool heaplessAllowed = false;
};
#ifdef TESTS_MTL
#include "shared/test/common/xe_hpg_core/mtl/test_traits_mtl.h"

View File

@ -1,11 +1,12 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@ -134,3 +135,40 @@ HWTEST_F(BuiltInSharedTest, GivenValidBuiltinTypeAndAnyTypeWhenGettingBuiltinCod
EXPECT_EQ(BuiltinCode::ECodeType::binary, builtinCode.type);
EXPECT_NE(0U, builtinCode.resource.size());
}
HWTEST2_F(BuiltInSharedTest, GivenHeaplessModeEnabledWhenGetBuiltinResourceNamesIsCalledThenResourceNameIsCorrect, MatchAny) {
class MockCompilerProductHelper : public CompilerProductHelperHw<productFamily> {
public:
bool isHeaplessModeEnabled() const override {
return true;
}
};
pDevice->executionEnvironment->rootDeviceEnvironments[0]->compilerProductHelper.reset(new MockCompilerProductHelper());
auto &hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
std::string deviceIpString = std::to_string(hwInfo.ipVersion.architecture) + "_" + std::to_string(hwInfo.ipVersion.release) + "_" + std::to_string(hwInfo.ipVersion.revision);
struct TestParam {
std::string builtInTypeAsString;
EBuiltInOps::Type builtinType;
};
TestParam params[] = {
{"copy_buffer_to_buffer_stateless", EBuiltInOps::copyBufferToBufferStatelessHeapless},
{"copy_buffer_rect_stateless", EBuiltInOps::copyBufferRectStatelessHeapless},
{"fill_buffer_stateless", EBuiltInOps::fillBufferStatelessHeapless}};
for (auto &[builtInTypeAsString, builtInType] : params) {
auto resourceNames = getBuiltinResourceNames(builtInType, BuiltinCode::ECodeType::binary, *pDevice);
std::string expectedResourceNameGeneric = "stateless_heapless_" + builtInTypeAsString + ".builtin_kernel.bin";
std::string expectedResourceNameForRelease = deviceIpString + "_" + expectedResourceNameGeneric;
EXPECT_EQ(1u, resourceNames.size());
EXPECT_EQ(resourceNames[0], expectedResourceNameForRelease);
}
}