mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
refactor: unify stateless and not stateless builtins kernels
Related-to: NEO-16156 Signed-off-by: Damian Tomczak <damian.tomczak@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8882b1e54a
commit
4eb37124cb
@@ -37,11 +37,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBuffer;
|
||||
break;
|
||||
case Builtin::copyBufferBytesStateless:
|
||||
kernelName = "copyBufferToBufferBytesSingleStateless";
|
||||
kernelName = "copyBufferToBufferBytesSingle";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBufferStateless;
|
||||
break;
|
||||
case Builtin::copyBufferBytesStatelessHeapless:
|
||||
kernelName = "copyBufferToBufferBytesSingleStateless";
|
||||
kernelName = "copyBufferToBufferBytesSingle";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless;
|
||||
break;
|
||||
case Builtin::copyBufferRectBytes2d:
|
||||
@@ -49,11 +49,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferRect;
|
||||
break;
|
||||
case Builtin::copyBufferRectBytes2dStateless:
|
||||
kernelName = "CopyBufferRectBytes2dStateless";
|
||||
kernelName = "CopyBufferRectBytes2d";
|
||||
builtin = NEO::EBuiltInOps::copyBufferRectStateless;
|
||||
break;
|
||||
case Builtin::copyBufferRectBytes2dStatelessHeapless:
|
||||
kernelName = "CopyBufferRectBytes2dStateless";
|
||||
kernelName = "CopyBufferRectBytes2d";
|
||||
builtin = NEO::EBuiltInOps::copyBufferRectStatelessHeapless;
|
||||
break;
|
||||
case Builtin::copyBufferRectBytes3d:
|
||||
@@ -61,11 +61,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferRect;
|
||||
break;
|
||||
case Builtin::copyBufferRectBytes3dStateless:
|
||||
kernelName = "CopyBufferRectBytes3dStateless";
|
||||
kernelName = "CopyBufferRectBytes3d";
|
||||
builtin = NEO::EBuiltInOps::copyBufferRectStateless;
|
||||
break;
|
||||
case Builtin::copyBufferRectBytes3dStatelessHeapless:
|
||||
kernelName = "CopyBufferRectBytes3dStateless";
|
||||
kernelName = "CopyBufferRectBytes3d";
|
||||
builtin = NEO::EBuiltInOps::copyBufferRectStatelessHeapless;
|
||||
break;
|
||||
case Builtin::copyBufferToBufferMiddle:
|
||||
@@ -73,11 +73,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBuffer;
|
||||
break;
|
||||
case Builtin::copyBufferToBufferMiddleStateless:
|
||||
kernelName = "CopyBufferToBufferMiddleRegionStateless";
|
||||
kernelName = "CopyBufferToBufferMiddleRegion";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBufferStateless;
|
||||
break;
|
||||
case Builtin::copyBufferToBufferMiddleStatelessHeapless:
|
||||
kernelName = "CopyBufferToBufferMiddleRegionStateless";
|
||||
kernelName = "CopyBufferToBufferMiddleRegion";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless;
|
||||
break;
|
||||
case Builtin::copyBufferToBufferSide:
|
||||
@@ -85,11 +85,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBuffer;
|
||||
break;
|
||||
case Builtin::copyBufferToBufferSideStateless:
|
||||
kernelName = "CopyBufferToBufferSideRegionStateless";
|
||||
kernelName = "CopyBufferToBufferSideRegion";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBufferStateless;
|
||||
break;
|
||||
case Builtin::copyBufferToBufferSideStatelessHeapless:
|
||||
kernelName = "CopyBufferToBufferSideRegionStateless";
|
||||
kernelName = "CopyBufferToBufferSideRegion";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless;
|
||||
break;
|
||||
case Builtin::fillBufferImmediate:
|
||||
@@ -97,11 +97,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::fillBuffer;
|
||||
break;
|
||||
case Builtin::fillBufferImmediateStateless:
|
||||
kernelName = "FillBufferImmediateStateless";
|
||||
kernelName = "FillBufferImmediate";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStateless;
|
||||
break;
|
||||
case Builtin::fillBufferImmediateStatelessHeapless:
|
||||
kernelName = "FillBufferImmediateStateless";
|
||||
kernelName = "FillBufferImmediate";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless;
|
||||
break;
|
||||
case Builtin::fillBufferImmediateLeftOver:
|
||||
@@ -109,11 +109,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::fillBuffer;
|
||||
break;
|
||||
case Builtin::fillBufferImmediateLeftOverStateless:
|
||||
kernelName = "FillBufferImmediateLeftOverStateless";
|
||||
kernelName = "FillBufferImmediateLeftOver";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStateless;
|
||||
break;
|
||||
case Builtin::fillBufferImmediateLeftOverStatelessHeapless:
|
||||
kernelName = "FillBufferImmediateLeftOverStateless";
|
||||
kernelName = "FillBufferImmediateLeftOver";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless;
|
||||
break;
|
||||
case Builtin::fillBufferSSHOffset:
|
||||
@@ -121,11 +121,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::fillBuffer;
|
||||
break;
|
||||
case Builtin::fillBufferSSHOffsetStateless:
|
||||
kernelName = "FillBufferSSHOffsetStateless";
|
||||
kernelName = "FillBufferSSHOffset";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStateless;
|
||||
break;
|
||||
case Builtin::fillBufferSSHOffsetStatelessHeapless:
|
||||
kernelName = "FillBufferSSHOffsetStateless";
|
||||
kernelName = "FillBufferSSHOffset";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless;
|
||||
break;
|
||||
case Builtin::fillBufferMiddle:
|
||||
@@ -133,11 +133,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::fillBuffer;
|
||||
break;
|
||||
case Builtin::fillBufferMiddleStateless:
|
||||
kernelName = "FillBufferMiddleStateless";
|
||||
kernelName = "FillBufferMiddle";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStateless;
|
||||
break;
|
||||
case Builtin::fillBufferMiddleStatelessHeapless:
|
||||
kernelName = "FillBufferMiddleStateless";
|
||||
kernelName = "FillBufferMiddle";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless;
|
||||
break;
|
||||
case Builtin::fillBufferRightLeftover:
|
||||
@@ -145,11 +145,11 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtin = NEO::EBuiltInOps::fillBuffer;
|
||||
break;
|
||||
case Builtin::fillBufferRightLeftoverStateless:
|
||||
kernelName = "FillBufferRightLeftoverStateless";
|
||||
kernelName = "FillBufferRightLeftover";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStateless;
|
||||
break;
|
||||
case Builtin::fillBufferRightLeftoverStatelessHeapless:
|
||||
kernelName = "FillBufferRightLeftoverStateless";
|
||||
kernelName = "FillBufferRightLeftover";
|
||||
builtin = NEO::EBuiltInOps::fillBufferStatelessHeapless;
|
||||
break;
|
||||
case Builtin::queryKernelTimestamps:
|
||||
@@ -182,19 +182,19 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d16BytesStateless:
|
||||
builtinName = "CopyBufferToImage3d16BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d16Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d16BytesAlignedStateless:
|
||||
builtinName = "CopyBufferToImage3d16BytesAlignedStateless";
|
||||
builtinName = "CopyBufferToImage3d16BytesAligned";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d16BytesHeapless:
|
||||
builtinName = "CopyBufferToImage3d16BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d16Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d16BytesAlignedHeapless:
|
||||
builtinName = "CopyBufferToImage3d16BytesAlignedStateless";
|
||||
builtinName = "CopyBufferToImage3d16BytesAligned";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d2Bytes:
|
||||
@@ -202,11 +202,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d2BytesStateless:
|
||||
builtinName = "CopyBufferToImage3d2BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d2Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d2BytesHeapless:
|
||||
builtinName = "CopyBufferToImage3d2BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d2Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d4Bytes:
|
||||
@@ -214,11 +214,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d4BytesStateless:
|
||||
builtinName = "CopyBufferToImage3d4BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d4Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d4BytesHeapless:
|
||||
builtinName = "CopyBufferToImage3d4BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d4Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d3To4Bytes:
|
||||
@@ -226,11 +226,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d3To4BytesStateless:
|
||||
builtinName = "CopyBufferToImage3d3To4BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d3To4Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d3To4BytesHeapless:
|
||||
builtinName = "CopyBufferToImage3d3To4BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d3To4Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d8Bytes:
|
||||
@@ -238,11 +238,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d8BytesStateless:
|
||||
builtinName = "CopyBufferToImage3d8BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d8Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d8BytesHeapless:
|
||||
builtinName = "CopyBufferToImage3d8BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d8Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d6To8Bytes:
|
||||
@@ -250,11 +250,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d6To8BytesStateless:
|
||||
builtinName = "CopyBufferToImage3d6To8BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d6To8Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3d6To8BytesHeapless:
|
||||
builtinName = "CopyBufferToImage3d6To8BytesStateless";
|
||||
builtinName = "CopyBufferToImage3d6To8Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3dBytes:
|
||||
@@ -262,11 +262,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3dBytesStateless:
|
||||
builtinName = "CopyBufferToImage3dBytesStateless";
|
||||
builtinName = "CopyBufferToImage3dBytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyBufferToImage3dBytesHeapless:
|
||||
builtinName = "CopyBufferToImage3dBytesStateless";
|
||||
builtinName = "CopyBufferToImage3dBytes";
|
||||
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer16Bytes:
|
||||
@@ -278,19 +278,19 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer16BytesStateless:
|
||||
builtinName = "CopyImage3dToBuffer16BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer16Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer16BytesAlignedStateless:
|
||||
builtinName = "CopyImage3dToBuffer16BytesAlignedStateless";
|
||||
builtinName = "CopyImage3dToBuffer16BytesAligned";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer16BytesHeapless:
|
||||
builtinName = "CopyImage3dToBuffer16BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer16Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer16BytesAlignedHeapless:
|
||||
builtinName = "CopyImage3dToBuffer16BytesAlignedStateless";
|
||||
builtinName = "CopyImage3dToBuffer16BytesAligned";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer2Bytes:
|
||||
@@ -298,11 +298,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer2BytesStateless:
|
||||
builtinName = "CopyImage3dToBuffer2BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer2Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer2BytesHeapless:
|
||||
builtinName = "CopyImage3dToBuffer2BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer2Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer3Bytes:
|
||||
@@ -310,11 +310,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer3BytesStateless:
|
||||
builtinName = "CopyImage3dToBuffer3BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer3Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer3BytesHeapless:
|
||||
builtinName = "CopyImage3dToBuffer3BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer3Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer4Bytes:
|
||||
@@ -322,11 +322,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer4BytesStateless:
|
||||
builtinName = "CopyImage3dToBuffer4BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer4Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer4BytesHeapless:
|
||||
builtinName = "CopyImage3dToBuffer4BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer4Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer4To3Bytes:
|
||||
@@ -334,11 +334,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer4To3BytesStateless:
|
||||
builtinName = "CopyImage3dToBuffer4To3BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer4To3Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer4To3BytesHeapless:
|
||||
builtinName = "CopyImage3dToBuffer4To3BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer4To3Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer6Bytes:
|
||||
@@ -346,11 +346,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer6BytesStateless:
|
||||
builtinName = "CopyImage3dToBuffer6BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer6Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer6BytesHeapless:
|
||||
builtinName = "CopyImage3dToBuffer6BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer6Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer8Bytes:
|
||||
@@ -358,11 +358,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer8BytesStateless:
|
||||
builtinName = "CopyImage3dToBuffer8BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer8Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer8BytesHeapless:
|
||||
builtinName = "CopyImage3dToBuffer8BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer8Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer8To6Bytes:
|
||||
@@ -370,11 +370,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer8To6BytesStateless:
|
||||
builtinName = "CopyImage3dToBuffer8To6BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer8To6Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBuffer8To6BytesHeapless:
|
||||
builtinName = "CopyImage3dToBuffer8To6BytesStateless";
|
||||
builtinName = "CopyImage3dToBuffer8To6Bytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBufferBytes:
|
||||
@@ -382,11 +382,11 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBufferBytesStateless:
|
||||
builtinName = "CopyImage3dToBufferBytesStateless";
|
||||
builtinName = "CopyImage3dToBufferBytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
|
||||
break;
|
||||
case ImageBuiltin::copyImage3dToBufferBytesHeapless:
|
||||
builtinName = "CopyImage3dToBufferBytesStateless";
|
||||
builtinName = "CopyImage3dToBufferBytes";
|
||||
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
|
||||
break;
|
||||
case ImageBuiltin::copyImageRegion:
|
||||
@@ -502,7 +502,7 @@ std::unique_ptr<BuiltinFunctionsLibImpl::BuiltinData> BuiltinFunctionsLibImpl::l
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = builtInName;
|
||||
res = this->modules[builtin]->createKernel(&kernelDesc, &kernelHandle);
|
||||
DEBUG_BREAK_IF(res != ZE_RESULT_SUCCESS);
|
||||
UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS);
|
||||
|
||||
kernel.reset(Kernel::fromHandle(kernelHandle));
|
||||
return std::unique_ptr<BuiltinData>(new BuiltinData{modules[builtin].get(), std::move(kernel)});
|
||||
|
||||
@@ -167,39 +167,39 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessBuiltinsWhenInitBuiltinKernel
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::copyBufferBytesStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("copyBufferToBufferBytesSingleStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("copyBufferToBufferBytesSingle", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::copyBufferToBufferMiddleStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToBufferMiddleRegionStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToBufferMiddleRegion", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::copyBufferToBufferSideStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToBufferStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToBufferSideRegionStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToBufferSideRegion", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::fillBufferImmediateStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("FillBufferImmediateStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("FillBufferImmediate", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::fillBufferImmediateLeftOverStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("FillBufferImmediateLeftOverStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("FillBufferImmediateLeftOver", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::fillBufferSSHOffsetStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("FillBufferSSHOffsetStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("FillBufferSSHOffset", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::fillBufferSSHOffsetStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("FillBufferSSHOffsetStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("FillBufferSSHOffset", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::fillBufferMiddleStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("FillBufferMiddleStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("FillBufferMiddle", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::fillBufferRightLeftoverStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::fillBufferStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("FillBufferRightLeftoverStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("FillBufferRightLeftover", lib.kernelNamePassed.c_str());
|
||||
}
|
||||
|
||||
HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessImageBuiltinsWhenInitBuiltinKernelThenCorrectArgumentsArePassed) {
|
||||
@@ -208,43 +208,43 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessImageBuiltinsWhenInitBuiltinK
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d16BytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d16BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d16Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d2BytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d2BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d2Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d4BytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d4BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d4Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d8BytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d8BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d8Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer16BytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer16BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer16Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer2BytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer2BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer2Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer4BytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer4BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer4Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBufferBytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBufferBytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBufferBytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer3BytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer3BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer3Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer6BytesHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer6BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer6Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImageRegionHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImageToImage3dHeapless, lib.builtinPassed);
|
||||
@@ -257,11 +257,11 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessBufferRectBuiltinsWhenInitBui
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::copyBufferRectBytes2dStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferRectStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferRectBytes2dStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferRectBytes2d", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::copyBufferRectBytes3dStatelessHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferRectStatelessHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferRectBytes3dStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferRectBytes3d", lib.kernelNamePassed.c_str());
|
||||
}
|
||||
|
||||
HWTEST_F(TestBuiltinFunctionsLibImpl, givenStatelessBufferRectBuiltinsWhenInitBuiltinKernelThenCorrectArgumentsArePassed) {
|
||||
@@ -270,11 +270,11 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenStatelessBufferRectBuiltinsWhenInitBu
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::copyBufferRectBytes2dStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferRectStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferRectBytes2dStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferRectBytes2d", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinKernel(L0::Builtin::copyBufferRectBytes3dStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferRectStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferRectBytes3dStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferRectBytes3d", lib.kernelNamePassed.c_str());
|
||||
}
|
||||
|
||||
HWTEST_F(TestBuiltinFunctionsLibImpl, givenStatelessImageBuiltinsWhenInitBuiltinKernelThenCorrectArgumentsArePassed) {
|
||||
@@ -282,43 +282,43 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenStatelessImageBuiltinsWhenInitBuiltin
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d16BytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d16BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d16Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d2BytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d2BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d2Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d4BytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d4BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d4Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d8BytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d8BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d8Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer16BytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer16BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer16Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer2BytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer2BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer2Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer4BytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer4BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer4Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBufferBytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBufferBytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBufferBytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer3BytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer3BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer3Bytes", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer6BytesStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer6BytesStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer6Bytes", lib.kernelNamePassed.c_str());
|
||||
}
|
||||
|
||||
HWTEST_F(TestBuiltinFunctionsLibImpl, givenAlignedImageBuiltinsWhenInitBuiltinKernelThenCorrectArgumentsArePassed) {
|
||||
@@ -330,11 +330,11 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenAlignedImageBuiltinsWhenInitBuiltinKe
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d16BytesAlignedStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d16BytesAlignedStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d16BytesAligned", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d16BytesAlignedHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyBufferToImage3d16BytesAlignedStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyBufferToImage3d16BytesAligned", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer16BytesAligned);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBuffer, lib.builtinPassed);
|
||||
@@ -342,11 +342,11 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenAlignedImageBuiltinsWhenInitBuiltinKe
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer16BytesAlignedStateless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer16BytesAlignedStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer16BytesAligned", lib.kernelNamePassed.c_str());
|
||||
|
||||
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer16BytesAlignedHeapless);
|
||||
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferHeapless, lib.builtinPassed);
|
||||
EXPECT_STREQ("CopyImage3dToBuffer16BytesAlignedStateless", lib.kernelNamePassed.c_str());
|
||||
EXPECT_STREQ("CopyImage3dToBuffer16BytesAligned", lib.kernelNamePassed.c_str());
|
||||
}
|
||||
|
||||
HWTEST_F(TestBuiltinFunctionsLibImpl, givenCompilerInterfaceWhenCreateDeviceAndImageSupportedThenBuiltinsImageFunctionsAreLoaded) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# Copyright (C) 2020-2025 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -9,6 +9,7 @@ target_sources(${TARGET_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel_2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_patch_value.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel_addressing.cpp
|
||||
)
|
||||
|
||||
add_subdirectories()
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
|
||||
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
|
||||
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
|
||||
#include "level_zero/core/source/device/device.h"
|
||||
#include "level_zero/core/source/kernel/kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
|
||||
namespace L0::ult {
|
||||
struct KernelAddressingTest : public DeviceFixture, public ::testing::Test {
|
||||
void SetUp() override {
|
||||
DeviceFixture::setUp();
|
||||
|
||||
builtinLib = std::make_unique<L0::BuiltinFunctionsLibImpl>(device, neoDevice->getBuiltIns());
|
||||
ASSERT_NE(nullptr, builtinLib.get());
|
||||
|
||||
auto &compilerProductHelper = device->getCompilerProductHelper();
|
||||
isHeapless = compilerProductHelper.isHeaplessModeEnabled(neoDevice->getHardwareInfo());
|
||||
isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
builtinLib.reset();
|
||||
DeviceFixture::tearDown();
|
||||
}
|
||||
|
||||
bool isHeapless = false;
|
||||
bool isStateless = false;
|
||||
std::unique_ptr<L0::BuiltinFunctionsLibImpl> builtinLib;
|
||||
};
|
||||
|
||||
TEST_F(KernelAddressingTest,
|
||||
givenBuiltinCopyBufferToBufferKernelsWhenFetchedFromBuiltinLibThenCorrectArgumentSizesAreUsed) {
|
||||
{
|
||||
// Builtin copyBufferBytes uses copyBufferToBufferBytesSingle without extra arguments
|
||||
} {
|
||||
const auto builtinType = BuiltinTypeHelper::adjustBuiltinType<L0::Builtin::copyBufferToBufferMiddle>(isStateless, isHeapless);
|
||||
|
||||
auto kernel = builtinLib->getFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
{
|
||||
const auto builtinType = BuiltinTypeHelper::adjustBuiltinType<L0::Builtin::copyBufferToBufferSide>(isStateless, isHeapless);
|
||||
|
||||
auto kernel = builtinLib->getFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(KernelAddressingTest,
|
||||
givenBuiltinCopyBufferRectKernelsWhenFetchedFromBuiltinLibThenCorrectArgumentSizesAreUsed) {
|
||||
{
|
||||
const auto builtinType = BuiltinTypeHelper::adjustBuiltinType<L0::Builtin::copyBufferRectBytes2d>(isStateless, isHeapless);
|
||||
|
||||
auto kernel = builtinLib->getFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(5).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
{
|
||||
const auto builtinType = BuiltinTypeHelper::adjustBuiltinType<L0::Builtin::copyBufferRectBytes3d>(isStateless, isHeapless);
|
||||
|
||||
auto kernel = builtinLib->getFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size, isStateless ? 4 * sizeof(uint64_t) : 4 * sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? 4 * sizeof(uint64_t) : 4 * sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(5).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(KernelAddressingTest, givenBuiltinFillBufferKernelsWhenFetchedFromProgramThenCorrectArgumentSizesAreUsed) {
|
||||
{
|
||||
const auto builtinType = BuiltinTypeHelper::adjustBuiltinType<L0::Builtin::fillBufferImmediate>(isStateless, isHeapless);
|
||||
|
||||
auto kernel = builtinLib->getFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
{
|
||||
const auto builtinType = BuiltinTypeHelper::adjustBuiltinType<L0::Builtin::fillBufferImmediateLeftOver>(isStateless, isHeapless);
|
||||
|
||||
auto kernel = builtinLib->getFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
{
|
||||
const auto builtinType = BuiltinTypeHelper::adjustBuiltinType<L0::Builtin::fillBufferSSHOffset>(isStateless, isHeapless);
|
||||
|
||||
auto kernel = builtinLib->getFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
{
|
||||
const auto builtinType = BuiltinTypeHelper::adjustBuiltinType<L0::Builtin::fillBufferMiddle>(isStateless, isHeapless);
|
||||
|
||||
auto kernel = builtinLib->getFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
{
|
||||
const auto builtinType = BuiltinTypeHelper::adjustBuiltinType<L0::Builtin::fillBufferRightLeftover>(isStateless, isHeapless);
|
||||
|
||||
auto kernel = builtinLib->getFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(KernelAddressingTest,
|
||||
givenBuiltinCopyBufferToImage3dKernelsWhenFetchedFromBuiltinLibThenCorrectArgumentSizesAreUsed) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto testBuiltinType = [&](auto builtinType) {
|
||||
auto kernel = builtinLib->getImageFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(2).template as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(4).template as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
};
|
||||
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyBufferToImage3d16Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyBufferToImage3d2Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyBufferToImage3d4Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyBufferToImage3d3To4Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyBufferToImage3d8Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyBufferToImage3d6To8Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyBufferToImage3dBytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyBufferToImage3d16BytesAligned>(isStateless, isHeapless));
|
||||
}
|
||||
|
||||
TEST_F(KernelAddressingTest,
|
||||
givenBuiltinCopyImage3dToBufferKernelsWhenFetchedFromBuiltinLibThenCorrectArgumentSizesAreUsed) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto testBuiltinType = [&](auto builtinType) {
|
||||
auto kernel = builtinLib->getImageFunction(builtinType);
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
auto kernelInfo = kernel->getImmutableData()->getKernelInfo();
|
||||
ASSERT_NE(nullptr, kernelInfo);
|
||||
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(3).template as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernelInfo->getArgDescriptorAt(4).template as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
};
|
||||
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBuffer16Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBuffer2Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBuffer3Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBuffer4Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBuffer4To3Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBuffer6Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBuffer8Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBuffer8To6Bytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBufferBytes>(isStateless, isHeapless));
|
||||
testBuiltinType(BuiltinTypeHelper::adjustImageBuiltinType<L0::ImageBuiltin::copyImage3dToBuffer16BytesAligned>(isStateless, isHeapless));
|
||||
}
|
||||
} // namespace L0::ult
|
||||
@@ -41,7 +41,7 @@ components:
|
||||
dest_dir: kernels_bin
|
||||
type: git
|
||||
branch: kernels_bin
|
||||
revision: 3770-5795
|
||||
revision: 3770-5810
|
||||
level_zero:
|
||||
asset_name: level_zero
|
||||
dest_dir: level_zero
|
||||
|
||||
@@ -44,7 +44,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferToBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp(kernelsLib, device, true) {}
|
||||
: BuiltInOp(kernelsLib, device, EBuiltInOps::copyBufferToBuffer, "") {}
|
||||
template <typename OffsetType>
|
||||
bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d1D, SplitDispatch::SplitMode::kernelSplit> kernelSplit1DBuilder(clDevice);
|
||||
@@ -139,16 +139,14 @@ class BuiltInOp<EBuiltInOps::copyBufferToBuffer> : public BuiltinDispatchInfoBui
|
||||
MultiDeviceKernel *kernMiddle = nullptr;
|
||||
MultiDeviceKernel *kernMiddleMisaligned = nullptr;
|
||||
MultiDeviceKernel *kernRightLeftover = nullptr;
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, EBuiltInOps::Type operation, ConstStringRef options)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
if (populateKernels) {
|
||||
populate(EBuiltInOps::copyBufferToBuffer,
|
||||
"",
|
||||
"CopyBufferToBufferLeftLeftover", kernLeftLeftover,
|
||||
"CopyBufferToBufferMiddle", kernMiddle,
|
||||
"CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned,
|
||||
"CopyBufferToBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
populate(operation,
|
||||
options,
|
||||
"CopyBufferToBufferLeftLeftover", kernLeftLeftover,
|
||||
"CopyBufferToBufferMiddle", kernMiddle,
|
||||
"CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned,
|
||||
"CopyBufferToBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -156,13 +154,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferToBufferStateless> : public BuiltInOp<EBuiltInOps::copyBufferToBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyBufferToBuffer>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyBufferToBufferStateless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyBufferToBufferLeftLeftoverStateless", kernLeftLeftover,
|
||||
"CopyBufferToBufferMiddleStateless", kernMiddle,
|
||||
"CopyBufferToBufferMiddleMisalignedStateless", kernMiddleMisaligned,
|
||||
"CopyBufferToBufferRightLeftoverStateless", kernRightLeftover);
|
||||
: BuiltInOp<EBuiltInOps::copyBufferToBuffer>(kernelsLib, device, EBuiltInOps::copyBufferToBufferStateless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
@@ -174,13 +166,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferToBufferStatelessHeapless> : public BuiltInOp<EBuiltInOps::copyBufferToBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyBufferToBuffer>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyBufferToBufferStatelessHeapless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyBufferToBufferLeftLeftoverStateless", kernLeftLeftover,
|
||||
"CopyBufferToBufferMiddleStateless", kernMiddle,
|
||||
"CopyBufferToBufferMiddleMisalignedStateless", kernMiddleMisaligned,
|
||||
"CopyBufferToBufferRightLeftoverStateless", kernRightLeftover);
|
||||
: BuiltInOp<EBuiltInOps::copyBufferToBuffer>(kernelsLib, device, EBuiltInOps::copyBufferToBufferStatelessHeapless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
@@ -192,7 +178,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferRect> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp(kernelsLib, device, true) {}
|
||||
: BuiltInOp(kernelsLib, device, EBuiltInOps::copyBufferRect, "") {}
|
||||
|
||||
template <typename OffsetType>
|
||||
bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const {
|
||||
@@ -397,24 +383,22 @@ class BuiltInOp<EBuiltInOps::copyBufferRect> : public BuiltinDispatchInfoBuilder
|
||||
MultiDeviceKernel *kernelLeft[3]{};
|
||||
MultiDeviceKernel *kernelMiddle[3]{};
|
||||
MultiDeviceKernel *kernelRight[3]{};
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, EBuiltInOps::Type operation, ConstStringRef options)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
if (populateKernels) {
|
||||
populate(EBuiltInOps::copyBufferRect,
|
||||
"",
|
||||
"CopyBufferRectBytes2d", kernelBytes[0],
|
||||
"CopyBufferRectBytes2d", kernelBytes[1],
|
||||
"CopyBufferRectBytes3d", kernelBytes[2],
|
||||
"CopyBufferRectBytes2d", kernelLeft[0],
|
||||
"CopyBufferRectBytes2d", kernelLeft[1],
|
||||
"CopyBufferRectBytes3d", kernelLeft[2],
|
||||
"CopyBufferRectBytesMiddle2d", kernelMiddle[0],
|
||||
"CopyBufferRectBytesMiddle2d", kernelMiddle[1],
|
||||
"CopyBufferRectBytesMiddle3d", kernelMiddle[2],
|
||||
"CopyBufferRectBytes2d", kernelRight[0],
|
||||
"CopyBufferRectBytes2d", kernelRight[1],
|
||||
"CopyBufferRectBytes3d", kernelRight[2]);
|
||||
}
|
||||
populate(operation,
|
||||
options,
|
||||
"CopyBufferRectBytes2d", kernelBytes[0],
|
||||
"CopyBufferRectBytes2d", kernelBytes[1],
|
||||
"CopyBufferRectBytes3d", kernelBytes[2],
|
||||
"CopyBufferRectBytes2d", kernelLeft[0],
|
||||
"CopyBufferRectBytes2d", kernelLeft[1],
|
||||
"CopyBufferRectBytes3d", kernelLeft[2],
|
||||
"CopyBufferRectBytesMiddle2d", kernelMiddle[0],
|
||||
"CopyBufferRectBytesMiddle2d", kernelMiddle[1],
|
||||
"CopyBufferRectBytesMiddle3d", kernelMiddle[2],
|
||||
"CopyBufferRectBytes2d", kernelRight[0],
|
||||
"CopyBufferRectBytes2d", kernelRight[1],
|
||||
"CopyBufferRectBytes3d", kernelRight[2]);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -422,21 +406,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferRectStateless> : public BuiltInOp<EBuiltInOps::copyBufferRect> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyBufferRect>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyBufferRectStateless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyBufferRectBytes2dStateless", kernelBytes[0],
|
||||
"CopyBufferRectBytes2dStateless", kernelBytes[1],
|
||||
"CopyBufferRectBytes3dStateless", kernelBytes[2],
|
||||
"CopyBufferRectBytes2dStateless", kernelLeft[0],
|
||||
"CopyBufferRectBytes2dStateless", kernelLeft[1],
|
||||
"CopyBufferRectBytes3dStateless", kernelLeft[2],
|
||||
"CopyBufferRectBytesMiddle2dStateless", kernelMiddle[0],
|
||||
"CopyBufferRectBytesMiddle2dStateless", kernelMiddle[1],
|
||||
"CopyBufferRectBytesMiddle3dStateless", kernelMiddle[2],
|
||||
"CopyBufferRectBytes2dStateless", kernelRight[0],
|
||||
"CopyBufferRectBytes2dStateless", kernelRight[1],
|
||||
"CopyBufferRectBytes3dStateless", kernelRight[2]);
|
||||
: BuiltInOp<EBuiltInOps::copyBufferRect>(kernelsLib, device, EBuiltInOps::copyBufferRectStateless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
|
||||
@@ -447,21 +417,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferRectStatelessHeapless> : public BuiltInOp<EBuiltInOps::copyBufferRect> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyBufferRect>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyBufferRectStatelessHeapless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyBufferRectBytes2dStateless", kernelBytes[0],
|
||||
"CopyBufferRectBytes2dStateless", kernelBytes[1],
|
||||
"CopyBufferRectBytes3dStateless", kernelBytes[2],
|
||||
"CopyBufferRectBytes2dStateless", kernelLeft[0],
|
||||
"CopyBufferRectBytes2dStateless", kernelLeft[1],
|
||||
"CopyBufferRectBytes3dStateless", kernelLeft[2],
|
||||
"CopyBufferRectBytesMiddle2dStateless", kernelMiddle[0],
|
||||
"CopyBufferRectBytesMiddle2dStateless", kernelMiddle[1],
|
||||
"CopyBufferRectBytesMiddle3dStateless", kernelMiddle[2],
|
||||
"CopyBufferRectBytes2dStateless", kernelRight[0],
|
||||
"CopyBufferRectBytes2dStateless", kernelRight[1],
|
||||
"CopyBufferRectBytes3dStateless", kernelRight[2]);
|
||||
: BuiltInOp<EBuiltInOps::copyBufferRect>(kernelsLib, device, EBuiltInOps::copyBufferRectStatelessHeapless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfo);
|
||||
@@ -472,7 +428,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::fillBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp(kernelsLib, device, true) {}
|
||||
: BuiltInOp(kernelsLib, device, EBuiltInOps::fillBuffer, "") {}
|
||||
|
||||
template <typename OffsetType>
|
||||
bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const {
|
||||
@@ -549,27 +505,21 @@ class BuiltInOp<EBuiltInOps::fillBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
MultiDeviceKernel *kernMiddle = nullptr;
|
||||
MultiDeviceKernel *kernRightLeftover = nullptr;
|
||||
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, EBuiltInOps::Type operation, ConstStringRef options)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
if (populateKernels) {
|
||||
populate(EBuiltInOps::fillBuffer,
|
||||
"",
|
||||
"FillBufferLeftLeftover", kernLeftLeftover,
|
||||
"FillBufferMiddle", kernMiddle,
|
||||
"FillBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
populate(operation,
|
||||
options,
|
||||
"FillBufferLeftLeftover", kernLeftLeftover,
|
||||
"FillBufferMiddle", kernMiddle,
|
||||
"FillBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::fillBufferStateless> : public BuiltInOp<EBuiltInOps::fillBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp<EBuiltInOps::fillBuffer>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::fillBufferStateless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"FillBufferLeftLeftoverStateless", kernLeftLeftover,
|
||||
"FillBufferMiddleStateless", kernMiddle,
|
||||
"FillBufferRightLeftoverStateless", kernRightLeftover);
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::fillBuffer>(kernelsLib, device, EBuiltInOps::fillBufferStateless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfos) const override {
|
||||
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfos);
|
||||
@@ -579,13 +529,10 @@ class BuiltInOp<EBuiltInOps::fillBufferStateless> : public BuiltInOp<EBuiltInOps
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::fillBufferStatelessHeapless> : public BuiltInOp<EBuiltInOps::fillBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp<EBuiltInOps::fillBuffer>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::fillBufferStatelessHeapless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"FillBufferLeftLeftoverStateless", kernLeftLeftover,
|
||||
"FillBufferMiddleStateless", kernMiddle,
|
||||
"FillBufferRightLeftoverStateless", kernRightLeftover);
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::fillBuffer>(kernelsLib, device, EBuiltInOps::fillBufferStatelessHeapless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfos) const override {
|
||||
return buildDispatchInfosTyped<uint64_t>(multiDispatchInfos);
|
||||
}
|
||||
@@ -595,7 +542,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferToImage3d> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp(kernelsLib, device, true) {}
|
||||
: BuiltInOp(kernelsLib, device, EBuiltInOps::copyBufferToImage3d, "") {}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
return buildDispatchInfosTyped<uint32_t>(multiDispatchInfo);
|
||||
@@ -604,18 +551,17 @@ class BuiltInOp<EBuiltInOps::copyBufferToImage3d> : public BuiltinDispatchInfoBu
|
||||
protected:
|
||||
MultiDeviceKernel *kernelBytes[5] = {nullptr};
|
||||
MultiDeviceKernel *kernel16BytesAligned = nullptr;
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, EBuiltInOps::Type operation, ConstStringRef options)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
if (populateKernels) {
|
||||
populate(EBuiltInOps::copyBufferToImage3d,
|
||||
"",
|
||||
"CopyBufferToImage3dBytes", kernelBytes[0],
|
||||
"CopyBufferToImage3d2Bytes", kernelBytes[1],
|
||||
"CopyBufferToImage3d4Bytes", kernelBytes[2],
|
||||
"CopyBufferToImage3d8Bytes", kernelBytes[3],
|
||||
"CopyBufferToImage3d16Bytes", kernelBytes[4],
|
||||
"CopyBufferToImage3d16BytesAligned", kernel16BytesAligned);
|
||||
}
|
||||
populate(operation,
|
||||
options,
|
||||
"CopyBufferToImage3dBytes", kernelBytes[0],
|
||||
"CopyBufferToImage3d2Bytes", kernelBytes[1],
|
||||
"CopyBufferToImage3d4Bytes", kernelBytes[2],
|
||||
"CopyBufferToImage3d8Bytes", kernelBytes[3],
|
||||
"CopyBufferToImage3d16Bytes", kernelBytes[4],
|
||||
"CopyBufferToImage3d16BytesAligned", kernel16BytesAligned);
|
||||
}
|
||||
|
||||
template <typename OffsetType>
|
||||
@@ -704,15 +650,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferToImage3dStateless> : public BuiltInOp<EBuiltInOps::copyBufferToImage3d> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyBufferToImage3d>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyBufferToImage3dStateless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyBufferToImage3dBytesStateless", kernelBytes[0],
|
||||
"CopyBufferToImage3d2BytesStateless", kernelBytes[1],
|
||||
"CopyBufferToImage3d4BytesStateless", kernelBytes[2],
|
||||
"CopyBufferToImage3d8BytesStateless", kernelBytes[3],
|
||||
"CopyBufferToImage3d16BytesStateless", kernelBytes[4],
|
||||
"CopyBufferToImage3d16BytesAlignedStateless", kernel16BytesAligned);
|
||||
: BuiltInOp<EBuiltInOps::copyBufferToImage3d>(kernelsLib, device, EBuiltInOps::copyBufferToImage3dStateless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
@@ -724,15 +662,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyBufferToImage3dHeapless> : public BuiltInOp<EBuiltInOps::copyBufferToImage3d> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyBufferToImage3d>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyBufferToImage3dHeapless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyBufferToImage3dBytesStateless", kernelBytes[0],
|
||||
"CopyBufferToImage3d2BytesStateless", kernelBytes[1],
|
||||
"CopyBufferToImage3d4BytesStateless", kernelBytes[2],
|
||||
"CopyBufferToImage3d8BytesStateless", kernelBytes[3],
|
||||
"CopyBufferToImage3d16BytesStateless", kernelBytes[4],
|
||||
"CopyBufferToImage3d16BytesAlignedStateless", kernel16BytesAligned);
|
||||
: BuiltInOp<EBuiltInOps::copyBufferToImage3d>(kernelsLib, device, EBuiltInOps::copyBufferToImage3dHeapless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
@@ -744,7 +674,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyImage3dToBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp(kernelsLib, device, true) {}
|
||||
: BuiltInOp(kernelsLib, device, EBuiltInOps::copyImage3dToBuffer, "") {}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
return buildDispatchInfosTyped<uint32_t>(multiDispatchInfo);
|
||||
@@ -754,18 +684,16 @@ class BuiltInOp<EBuiltInOps::copyImage3dToBuffer> : public BuiltinDispatchInfoBu
|
||||
MultiDeviceKernel *kernelBytes[5] = {nullptr};
|
||||
MultiDeviceKernel *kernel16BytesAligned = nullptr;
|
||||
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, EBuiltInOps::Type operation, ConstStringRef options)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
if (populateKernels) {
|
||||
populate(EBuiltInOps::copyImage3dToBuffer,
|
||||
"",
|
||||
"CopyImage3dToBufferBytes", kernelBytes[0],
|
||||
"CopyImage3dToBuffer2Bytes", kernelBytes[1],
|
||||
"CopyImage3dToBuffer4Bytes", kernelBytes[2],
|
||||
"CopyImage3dToBuffer8Bytes", kernelBytes[3],
|
||||
"CopyImage3dToBuffer16Bytes", kernelBytes[4],
|
||||
"CopyImage3dToBuffer16BytesAligned", kernel16BytesAligned);
|
||||
}
|
||||
populate(operation,
|
||||
options,
|
||||
"CopyImage3dToBufferBytes", kernelBytes[0],
|
||||
"CopyImage3dToBuffer2Bytes", kernelBytes[1],
|
||||
"CopyImage3dToBuffer4Bytes", kernelBytes[2],
|
||||
"CopyImage3dToBuffer8Bytes", kernelBytes[3],
|
||||
"CopyImage3dToBuffer16Bytes", kernelBytes[4],
|
||||
"CopyImage3dToBuffer16BytesAligned", kernel16BytesAligned);
|
||||
}
|
||||
|
||||
template <typename OffsetType>
|
||||
@@ -860,15 +788,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyImage3dToBufferStateless> : public BuiltInOp<EBuiltInOps::copyImage3dToBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyImage3dToBuffer>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyImage3dToBufferStateless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyImage3dToBufferBytesStateless", kernelBytes[0],
|
||||
"CopyImage3dToBuffer2BytesStateless", kernelBytes[1],
|
||||
"CopyImage3dToBuffer4BytesStateless", kernelBytes[2],
|
||||
"CopyImage3dToBuffer8BytesStateless", kernelBytes[3],
|
||||
"CopyImage3dToBuffer16BytesStateless", kernelBytes[4],
|
||||
"CopyImage3dToBuffer16BytesAlignedStateless", kernel16BytesAligned);
|
||||
: BuiltInOp<EBuiltInOps::copyImage3dToBuffer>(kernelsLib, device, EBuiltInOps::copyImage3dToBufferStateless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
@@ -880,15 +800,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::copyImage3dToBufferHeapless> : public BuiltInOp<EBuiltInOps::copyImage3dToBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device)
|
||||
: BuiltInOp<EBuiltInOps::copyImage3dToBuffer>(kernelsLib, device, false) {
|
||||
populate(EBuiltInOps::copyImage3dToBufferHeapless,
|
||||
CompilerOptions::greaterThan4gbBuffersRequired,
|
||||
"CopyImage3dToBufferBytesStateless", kernelBytes[0],
|
||||
"CopyImage3dToBuffer2BytesStateless", kernelBytes[1],
|
||||
"CopyImage3dToBuffer4BytesStateless", kernelBytes[2],
|
||||
"CopyImage3dToBuffer8BytesStateless", kernelBytes[3],
|
||||
"CopyImage3dToBuffer16BytesStateless", kernelBytes[4],
|
||||
"CopyImage3dToBuffer16BytesAlignedStateless", kernel16BytesAligned);
|
||||
: BuiltInOp<EBuiltInOps::copyImage3dToBuffer>(kernelsLib, device, EBuiltInOps::copyImage3dToBufferHeapless, CompilerOptions::wideStatelessOptions.c_str()) {
|
||||
}
|
||||
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override {
|
||||
|
||||
@@ -1084,6 +1084,7 @@ TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderFillLocalBufferStatelessIsU
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStatelessIsUsedThenParamsAreCorrect) {
|
||||
REQUIRE_64BIT_OR_SKIP();
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
|
||||
@@ -2137,802 +2138,3 @@ HWTEST2_F(BuiltInTests, whenBuilderFillLocalBufferStatelessHeaplessIsUsedThenPar
|
||||
EXPECT_FALSE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImage16BytesAlignedSrcPtrWhenBuilderIsUsedThenAlignedKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
alignas(16) uint8_t alignedBuffer[bufferSize];
|
||||
void *srcPtr = alignedBuffer;
|
||||
ASSERT_EQ(0u, reinterpret_cast<uintptr_t>(srcPtr) & 0x0000000F);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcPtr = srcPtr;
|
||||
dc.dstMemObj = image;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 0;
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") != std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImage16BytesAlignedSrcMemObjWhenBuilderIsUsedThenAlignedKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
MockBuffer srcBuffer;
|
||||
srcBuffer.size = 1024;
|
||||
srcBuffer.mockGfxAllocation.gpuAddress = 0x1000;
|
||||
ASSERT_EQ(0u, srcBuffer.mockGfxAllocation.gpuAddress & 0x0000000F);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = &srcBuffer;
|
||||
dc.dstMemObj = image;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 0;
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") != std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImage16BytesWithAddressMisalignedSrcPtrWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
uint8_t buffer[bufferSize + 16];
|
||||
void *srcPtr = buffer + 1;
|
||||
ASSERT_NE(0u, reinterpret_cast<uintptr_t>(srcPtr) & 0x0000000F);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcPtr = srcPtr;
|
||||
dc.dstMemObj = image;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 0;
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyImage3dToBuffer16BytesAlignedDstPtrWhenBuilderIsUsedThenAlignedKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
alignas(16) uint8_t alignedBuffer[bufferSize];
|
||||
void *dstPtr = alignedBuffer;
|
||||
ASSERT_EQ(0u, reinterpret_cast<uintptr_t>(dstPtr) & 0x0000000F);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = image;
|
||||
dc.dstPtr = dstPtr;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") != std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyImage3dToBuffer16BytesAlignedDstMemObjWhenBuilderIsUsedThenAlignedKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
MockBuffer dstBuffer;
|
||||
dstBuffer.size = 1024;
|
||||
dstBuffer.mockGfxAllocation.gpuAddress = 0x2000;
|
||||
ASSERT_EQ(0u, dstBuffer.mockGfxAllocation.gpuAddress & 0x0000000F);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = image;
|
||||
dc.dstMemObj = &dstBuffer;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") != std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyImage3dToBuffer16BytesWithAddressMisalignedDstPtrWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
uint8_t buffer[bufferSize + 16];
|
||||
void *dstPtr = buffer + 1;
|
||||
ASSERT_NE(0u, reinterpret_cast<uintptr_t>(dstPtr) & 0x0000000F);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = image;
|
||||
dc.dstPtr = dstPtr;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyImage3dToBufferNon16BytesFormatWhenBuilderIsUsedThenAlignmentCheckIsSkipped) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_R, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
alignas(16) uint8_t alignedBuffer[bufferSize];
|
||||
void *dstPtr = alignedBuffer;
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = image;
|
||||
dc.dstPtr = dstPtr;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("4Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") == std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImageNon16BytesFormatWhenBuilderIsUsedThenAlignmentCheckIsSkipped) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_R, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
alignas(16) uint8_t alignedBuffer[bufferSize];
|
||||
void *srcPtr = alignedBuffer;
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcPtr = srcPtr;
|
||||
dc.dstMemObj = image;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 0;
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("4Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") == std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImage16BytesWithImageAsSrcMemObjWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto dstImage = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, dstImage);
|
||||
|
||||
auto srcImage = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, srcImage);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcImage;
|
||||
dc.dstMemObj = dstImage;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 0;
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete dstImage;
|
||||
delete srcImage;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImage16BytesWithAlignedBufferWhenBuilderIsUsedThenAlignedKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto srcBuffer = Buffer::create(pContext, 0, bufferSize, nullptr, retVal);
|
||||
ASSERT_NE(nullptr, srcBuffer);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer;
|
||||
dc.dstMemObj = image;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 0;
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") != std::string::npos);
|
||||
|
||||
delete image;
|
||||
delete srcBuffer;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImage16BytesWithOffsetMisalignedBufferWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto srcBuffer = Buffer::create(pContext, 0, bufferSize, nullptr, retVal);
|
||||
ASSERT_NE(nullptr, srcBuffer);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer;
|
||||
dc.dstMemObj = image;
|
||||
dc.srcOffset = {1, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 0;
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
delete srcBuffer;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyImageToBuffer16BytesWithImageAsDstMemObjWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto srcImage = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, srcImage);
|
||||
|
||||
auto dstImage = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, dstImage);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcImage;
|
||||
dc.dstMemObj = dstImage;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete srcImage;
|
||||
delete dstImage;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyImageToBuffer16BytesWithAlignedBufferWhenBuilderIsUsedThenAlignedKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto srcImage = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, srcImage);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto dstBuffer = Buffer::create(pContext, 0, bufferSize, nullptr, retVal);
|
||||
ASSERT_NE(nullptr, dstBuffer);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcImage;
|
||||
dc.dstMemObj = dstBuffer;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") != std::string::npos);
|
||||
|
||||
delete srcImage;
|
||||
delete dstBuffer;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyImageToBuffer16BytesWithOffsetMisalignedBufferWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto srcImage = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, srcImage);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto dstBuffer = Buffer::create(pContext, 0, bufferSize, nullptr, retVal);
|
||||
ASSERT_NE(nullptr, dstBuffer);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcImage;
|
||||
dc.dstMemObj = dstBuffer;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {1, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete srcImage;
|
||||
delete dstBuffer;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImage16BytesWithRowPitchMisalignedWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
alignas(16) uint8_t alignedBuffer[bufferSize];
|
||||
void *srcPtr = alignedBuffer;
|
||||
ASSERT_EQ(0u, reinterpret_cast<uintptr_t>(srcPtr) & 0x0000000F);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcPtr = srcPtr;
|
||||
dc.dstMemObj = image;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 68; // 16-byte misaligned row pitch
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyImage3dToBuffer16BytesWithSlicePitchMisalignedWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
alignas(16) uint8_t alignedBuffer[bufferSize];
|
||||
void *dstPtr = alignedBuffer;
|
||||
ASSERT_EQ(0u, reinterpret_cast<uintptr_t>(dstPtr) & 0x0000000F);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = image;
|
||||
dc.dstPtr = dstPtr;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 132; // 16-byte misaligned slice pitch
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImage16BytesWithOffsetMisalignedWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
constexpr size_t bufferSize = 1024;
|
||||
alignas(16) uint8_t alignedBuffer[bufferSize];
|
||||
void *srcPtr = alignedBuffer;
|
||||
ASSERT_EQ(0u, reinterpret_cast<uintptr_t>(srcPtr) & 0x0000000F);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcPtr = srcPtr;
|
||||
dc.dstMemObj = image;
|
||||
dc.srcOffset = {8, 0, 0}; // 16-byte misaligned offset
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 0;
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyImage3dToBuffer16BytesWithNullAddressWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
MockBuffer dstBuffer;
|
||||
dstBuffer.size = 1024;
|
||||
dstBuffer.mockGfxAllocation.gpuAddress = 0;
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = image;
|
||||
dc.dstMemObj = &dstBuffer;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.dstRowPitch = 0;
|
||||
dc.dstSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
|
||||
HWTEST_F(BuiltInTests, givenCopyBufferToImage16BytesWithNullPtrAndNullMemObjWhenBuilderIsUsedThenRegularKernelIsSelected) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
auto &compilerProductHelper = pClDevice->getCompilerProductHelper();
|
||||
bool isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
bool heaplessAllowed = compilerProductHelper.isHeaplessModeEnabled(pClDevice->getHardwareInfo());
|
||||
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, heaplessAllowed);
|
||||
|
||||
cl_image_format imageFormat = {CL_RGBA, CL_FLOAT};
|
||||
cl_image_desc imageDesc = {CL_MEM_OBJECT_IMAGE3D, 4, 4, 4, 1, 0, 0, 0, 0, {nullptr}};
|
||||
|
||||
auto image = ImageHelperUlt<Image3dDefaults>::create(pContext, &imageDesc, &imageFormat);
|
||||
ASSERT_NE(nullptr, image);
|
||||
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, *pClDevice);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcPtr = nullptr;
|
||||
dc.srcMemObj = nullptr;
|
||||
dc.dstMemObj = image;
|
||||
dc.srcOffset = {0, 0, 0};
|
||||
dc.dstOffset = {0, 0, 0};
|
||||
dc.size = {1, 1, 1};
|
||||
dc.srcRowPitch = 0;
|
||||
dc.srcSlicePitch = 0;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo(dc);
|
||||
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
|
||||
EXPECT_EQ(1u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("16Bytes") != std::string::npos);
|
||||
EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.find("Aligned") == std::string::npos);
|
||||
|
||||
delete image;
|
||||
}
|
||||
@@ -167,9 +167,6 @@ HWTEST_F(EnqueueFillBufferCmdTests, GivenRightLeftoverWhenFillingBufferThenFillB
|
||||
builder.buildDispatchInfos(mdi);
|
||||
EXPECT_EQ(1u, mdi.size());
|
||||
|
||||
auto kernel = mdi.begin()->getKernel();
|
||||
EXPECT_STREQ(EBuiltInOps::isHeapless(builtInType) ? "FillBufferRightLeftoverStateless" : "FillBufferRightLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
|
||||
|
||||
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||
}
|
||||
|
||||
@@ -195,9 +192,6 @@ HWTEST_F(EnqueueFillBufferCmdTests, GivenMiddleWhenFillingBufferThenFillBufferMi
|
||||
builder.buildDispatchInfos(mdi);
|
||||
EXPECT_EQ(1u, mdi.size());
|
||||
|
||||
auto kernel = mdi.begin()->getKernel();
|
||||
EXPECT_STREQ(EBuiltInOps::isHeapless(builtInType) ? "FillBufferMiddleStateless" : "FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
|
||||
|
||||
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||
}
|
||||
|
||||
@@ -223,9 +217,6 @@ HWTEST_F(EnqueueFillBufferCmdTests, GivenLeftLeftoverWhenFillingBufferThenFillBu
|
||||
builder.buildDispatchInfos(mdi);
|
||||
EXPECT_EQ(1u, mdi.size());
|
||||
|
||||
auto kernel = mdi.begin()->getKernel();
|
||||
EXPECT_STREQ(EBuiltInOps::isHeapless(builtInType) ? "FillBufferLeftLeftoverStateless" : "FillBufferLeftLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
|
||||
|
||||
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
|
||||
}
|
||||
|
||||
|
||||
@@ -159,8 +159,9 @@ HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBuffer
|
||||
size_t middleElSize = 4 * sizeof(uint32_t);
|
||||
EXPECT_EQ(Vec3<size_t>(256 / middleElSize, 1, 1), di->getGWS());
|
||||
|
||||
auto kernel = mdi->begin()->getKernel();
|
||||
EXPECT_EQ(EBuiltInOps::isStateless(builtIn) ? "CopyBufferToBufferMiddleStateless" : "CopyBufferToBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName);
|
||||
auto kernel = di->getKernel();
|
||||
EXPECT_EQ(kernel->getKernelInfo().getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size, EBuiltInOps::isStateless(builtIn) ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(kernel->getKernelInfo().getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, EBuiltInOps::isStateless(builtIn) ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderAndSrcHostPtrThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) {
|
||||
|
||||
@@ -227,9 +227,6 @@ HWTEST_P(EnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilder
|
||||
auto di = mdi->begin();
|
||||
size_t middleElSize = sizeof(uint32_t);
|
||||
EXPECT_EQ(Vec3<size_t>(256 / middleElSize, 1, 1), di->getGWS());
|
||||
|
||||
auto kernel = di->getKernel();
|
||||
EXPECT_STREQ(EBuiltInOps::isStateless(builtIn) ? "FillBufferMiddleStateless" : "FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(size_t,
|
||||
|
||||
@@ -23,6 +23,7 @@ set(IGDRCL_SRCS_tests_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/debug_kernel_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/substitute_kernel_heap_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_addressing_tests.cpp
|
||||
)
|
||||
|
||||
if(TESTS_PVC_AND_LATER)
|
||||
|
||||
192
opencl/test/unit_test/kernel/kernel_addressing_tests.cpp
Normal file
192
opencl/test/unit_test/kernel/kernel_addressing_tests.cpp
Normal file
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/built_ins/built_in_ops_base.h"
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/kernel/kernel_arg_descriptor.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/cl_device/cl_device_vector.h"
|
||||
#include "opencl/source/program/program.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
struct KernelAddressingTest : public ClDeviceFixture, public ::testing::Test {
|
||||
void SetUp() override {
|
||||
ClDeviceFixture::setUp();
|
||||
|
||||
auto &compilerProductHelper = pDevice->getCompilerProductHelper();
|
||||
isHeapless = compilerProductHelper.isHeaplessModeEnabled(pDevice->getHardwareInfo());
|
||||
isStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
prog.reset();
|
||||
ClDeviceFixture::tearDown();
|
||||
}
|
||||
|
||||
bool isHeapless = false;
|
||||
bool isStateless = false;
|
||||
std::unique_ptr<Program> prog;
|
||||
};
|
||||
|
||||
TEST_F(KernelAddressingTest, givenBuiltinCopyBufferToBufferKernelsWhenFetchedFromProgramThenCorrectArgumentSizesAreUsed) {
|
||||
const auto builtinType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToBuffer>(isStateless, isHeapless);
|
||||
|
||||
auto src = pDevice->getBuiltIns()->getBuiltinsLib().getBuiltinCode(builtinType, BuiltinCode::ECodeType::any, *pDevice);
|
||||
ClDeviceVector deviceVector;
|
||||
deviceVector.push_back(pClDevice);
|
||||
prog.reset(BuiltinDispatchInfoBuilder::createProgramFromCode(src, deviceVector).release());
|
||||
prog->build(deviceVector, "");
|
||||
|
||||
const std::vector<const char *> bufferToBufferKernelNames = {
|
||||
"CopyBufferToBufferBytes",
|
||||
"CopyBufferToBufferMiddle",
|
||||
"CopyBufferToBufferMiddleMisaligned",
|
||||
"CopyBufferToBufferRightLeftover",
|
||||
"CopyBufferToBufferSideRegion",
|
||||
"CopyBufferToBufferMiddleRegion"};
|
||||
|
||||
for (const auto &kernelName : bufferToBufferKernelNames) {
|
||||
auto pKernelInfo = prog->getKernelInfo(kernelName, 0);
|
||||
ASSERT_NE(pKernelInfo, nullptr);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(KernelAddressingTest, givenBuiltinCopyBufferRectKernelsWhenFetchedFromProgramThenCorrectArgumentSizesAreUsed) {
|
||||
const auto builtinType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferRect>(isStateless, isHeapless);
|
||||
|
||||
auto src = pDevice->getBuiltIns()->getBuiltinsLib().getBuiltinCode(builtinType, BuiltinCode::ECodeType::any, *pDevice);
|
||||
ClDeviceVector deviceVector;
|
||||
deviceVector.push_back(pClDevice);
|
||||
prog.reset(BuiltinDispatchInfoBuilder::createProgramFromCode(src, deviceVector).release());
|
||||
prog->build(deviceVector, "");
|
||||
auto pKernelInfo = prog->getKernelInfo("CopyBufferRectBytes2d", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(5).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
pKernelInfo = prog->getKernelInfo("CopyBufferRectBytesMiddle2d", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(5).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
pKernelInfo = prog->getKernelInfo("CopyBufferRectBytes3d", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size, isStateless ? 4 * sizeof(uint64_t) : 4 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? 4 * sizeof(uint64_t) : 4 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(5).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
pKernelInfo = prog->getKernelInfo("CopyBufferRectBytesMiddle3d", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size, isStateless ? 4 * sizeof(uint64_t) : 4 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? 4 * sizeof(uint64_t) : 4 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(5).as<ArgDescValue>().elements[0].size, isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
TEST_F(KernelAddressingTest, givenBuiltinFillBufferKernelsWhenFetchedFromProgramThenCorrectArgumentSizesAreUsed) {
|
||||
const auto builtinType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(isStateless, isHeapless);
|
||||
|
||||
auto src = pDevice->getBuiltIns()->getBuiltinsLib().getBuiltinCode(builtinType, BuiltinCode::ECodeType::any, *pDevice);
|
||||
ClDeviceVector deviceVector;
|
||||
deviceVector.push_back(pClDevice);
|
||||
prog.reset(BuiltinDispatchInfoBuilder::createProgramFromCode(src, deviceVector).release());
|
||||
prog->build(deviceVector, "");
|
||||
auto pKernelInfo = prog->getKernelInfo("FillBufferBytes", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
pKernelInfo = prog->getKernelInfo("FillBufferLeftLeftover", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
pKernelInfo = prog->getKernelInfo("FillBufferMiddle", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
pKernelInfo = prog->getKernelInfo("FillBufferRightLeftover", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
pKernelInfo = prog->getKernelInfo("FillBufferImmediate", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
pKernelInfo = prog->getKernelInfo("FillBufferImmediateLeftOver", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
pKernelInfo = prog->getKernelInfo("FillBufferSSHOffset", 0);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(1).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size, isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
}
|
||||
|
||||
TEST_F(KernelAddressingTest, givenBuiltinCopyBufferToImage3dKernelsWhenFetchedFromProgramThenCorrectArgumentSizesAreUsed) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
const auto builtinType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyBufferToImage3d>(isStateless, isHeapless);
|
||||
|
||||
auto src = pDevice->getBuiltIns()->getBuiltinsLib().getBuiltinCode(builtinType, BuiltinCode::ECodeType::any, *pDevice);
|
||||
ClDeviceVector deviceVector;
|
||||
deviceVector.push_back(pClDevice);
|
||||
prog.reset(BuiltinDispatchInfoBuilder::createProgramFromCode(src, deviceVector).release());
|
||||
prog->build(deviceVector, "");
|
||||
|
||||
const char *bufferToImageKernelNames[] = {
|
||||
"CopyBufferToImage3dBytes",
|
||||
"CopyBufferToImage3d2Bytes",
|
||||
"CopyBufferToImage3d4Bytes",
|
||||
"CopyBufferToImage3d3To4Bytes",
|
||||
"CopyBufferToImage3d8Bytes",
|
||||
"CopyBufferToImage3d6To8Bytes",
|
||||
"CopyBufferToImage3d16Bytes",
|
||||
"CopyBufferToImage3d16BytesAligned"};
|
||||
|
||||
for (const auto &kernelName : bufferToImageKernelNames) {
|
||||
auto pKernelInfo = prog->getKernelInfo(kernelName, 0);
|
||||
ASSERT_NE(pKernelInfo, nullptr);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(2).as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(KernelAddressingTest, givenBuiltinCopyImage3dToBufferKernelsWhenFetchedFromProgramThenCorrectArgumentSizesAreUsed) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
const auto builtinType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::copyImage3dToBuffer>(isStateless, isHeapless);
|
||||
|
||||
auto src = pDevice->getBuiltIns()->getBuiltinsLib().getBuiltinCode(builtinType, BuiltinCode::ECodeType::any, *pDevice);
|
||||
ClDeviceVector deviceVector;
|
||||
deviceVector.push_back(pClDevice);
|
||||
prog.reset(BuiltinDispatchInfoBuilder::createProgramFromCode(src, deviceVector).release());
|
||||
auto ret = prog->build(deviceVector, "");
|
||||
ASSERT_EQ(ret, CL_SUCCESS);
|
||||
|
||||
const char *kernelNames[] = {
|
||||
"CopyImage3dToBufferBytes",
|
||||
"CopyImage3dToBuffer2Bytes",
|
||||
"CopyImage3dToBuffer4Bytes",
|
||||
"CopyImage3dToBuffer4To3Bytes",
|
||||
"CopyImage3dToBuffer8Bytes",
|
||||
"CopyImage3dToBuffer8To6Bytes",
|
||||
"CopyImage3dToBuffer16Bytes",
|
||||
"CopyImage3dToBuffer16BytesAligned"};
|
||||
|
||||
for (const auto &kernelName : kernelNames) {
|
||||
auto pKernelInfo = prog->getKernelInfo(kernelName, 0);
|
||||
ASSERT_NE(pKernelInfo, nullptr);
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(3).as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? sizeof(uint64_t) : sizeof(uint32_t));
|
||||
EXPECT_EQ(pKernelInfo->getArgDescriptorAt(4).as<ArgDescValue>().elements[0].size,
|
||||
isStateless ? 2 * sizeof(uint64_t) : 2 * sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
@@ -24,19 +24,14 @@ set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_INS ${NEO_CORE_SRCS_BUILT_INS})
|
||||
set(NEO_CORE_SRCS_BUILT_IN_KERNELS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/aux_translation.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_kernel_timestamps.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer_stateless.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.builtin_kernel
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.builtin_kernel
|
||||
|
||||
@@ -30,30 +30,25 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
||||
case EBuiltInOps::auxTranslation:
|
||||
return "aux_translation.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferToBuffer:
|
||||
return "copy_buffer_to_buffer.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferToBufferStateless:
|
||||
case EBuiltInOps::copyBufferToBufferStatelessHeapless:
|
||||
return "copy_buffer_to_buffer_stateless.builtin_kernel";
|
||||
return "copy_buffer_to_buffer.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferRect:
|
||||
return "copy_buffer_rect.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferRectStateless:
|
||||
case EBuiltInOps::copyBufferRectStatelessHeapless:
|
||||
return "copy_buffer_rect_stateless.builtin_kernel";
|
||||
return "copy_buffer_rect.builtin_kernel";
|
||||
case EBuiltInOps::fillBuffer:
|
||||
return "fill_buffer.builtin_kernel";
|
||||
case EBuiltInOps::fillBufferStateless:
|
||||
case EBuiltInOps::fillBufferStatelessHeapless:
|
||||
return "fill_buffer_stateless.builtin_kernel";
|
||||
return "fill_buffer.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferToImage3d:
|
||||
return "copy_buffer_to_image3d.builtin_kernel";
|
||||
case EBuiltInOps::copyBufferToImage3dStateless:
|
||||
case EBuiltInOps::copyBufferToImage3dHeapless:
|
||||
return "copy_buffer_to_image3d_stateless.builtin_kernel";
|
||||
return "copy_buffer_to_image3d.builtin_kernel";
|
||||
case EBuiltInOps::copyImage3dToBuffer:
|
||||
return "copy_image3d_to_buffer.builtin_kernel";
|
||||
case EBuiltInOps::copyImage3dToBufferStateless:
|
||||
case EBuiltInOps::copyImage3dToBufferHeapless:
|
||||
return "copy_image3d_to_buffer_stateless.builtin_kernel";
|
||||
return "copy_image3d_to_buffer.builtin_kernel";
|
||||
case EBuiltInOps::copyImageToImage1d:
|
||||
case EBuiltInOps::copyImageToImage1dHeapless:
|
||||
return "copy_image_to_image1d.builtin_kernel";
|
||||
|
||||
@@ -35,14 +35,14 @@ set(GENERATED_BUILTINS_IMAGES
|
||||
)
|
||||
|
||||
set(GENERATED_BUILTINS_IMAGES_STATELESS
|
||||
"copy_buffer_to_image3d_stateless"
|
||||
"copy_image3d_to_buffer_stateless"
|
||||
"copy_buffer_to_image3d"
|
||||
"copy_image3d_to_buffer"
|
||||
)
|
||||
|
||||
set(GENERATED_BUILTINS_STATELESS
|
||||
"copy_buffer_to_buffer_stateless"
|
||||
"copy_buffer_rect_stateless"
|
||||
"fill_buffer_stateless"
|
||||
"copy_buffer_to_buffer"
|
||||
"copy_buffer_rect"
|
||||
"fill_buffer"
|
||||
)
|
||||
|
||||
foreach(MODE ${ADDRESSING_MODES})
|
||||
|
||||
@@ -74,7 +74,11 @@ function(compile_builtin core_type platform_it builtin bits builtin_options mode
|
||||
set(heapless_mode "enable")
|
||||
endif()
|
||||
|
||||
if(${mode} STREQUAL "stateless" OR ${mode} STREQUAL "heapless")
|
||||
list(APPEND __ocloc__options__ "-DWIDE_STATELESS=1")
|
||||
endif()
|
||||
list(APPEND __ocloc__options__ "-cl-kernel-arg-info")
|
||||
|
||||
set(INTERNAL_OPTIONS "${${mode}_INTERNAL_OPTIONS}")
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUT_FILE_SPV}
|
||||
|
||||
@@ -6,88 +6,84 @@
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
}
|
||||
|
||||
)==="
|
||||
|
||||
@@ -1,93 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
|
||||
__kernel void CopyBufferRectBytes2dStateless(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
ulong2 SrcOrigin,
|
||||
ulong2 DstOrigin,
|
||||
ulong SrcPitch,
|
||||
ulong DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
|
||||
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2dStateless(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
ulong2 SrcOrigin,
|
||||
ulong2 DstOrigin,
|
||||
ulong SrcPitch,
|
||||
ulong DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
|
||||
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytes3dStateless(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
size_t z = get_global_id(2);
|
||||
|
||||
size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3dStateless(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
ulong4 SrcOrigin,
|
||||
ulong4 DstOrigin,
|
||||
ulong2 SrcPitch,
|
||||
ulong2 DstPitch )
|
||||
|
||||
{
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
size_t z = get_global_id(2);
|
||||
|
||||
size_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
size_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
}
|
||||
)==="
|
||||
@@ -1,19 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#include "kernel_types.h"
|
||||
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
offset_t bytesToRead )
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
@@ -25,24 +27,24 @@ __kernel void CopyBufferToBufferBytes(
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
@@ -52,17 +54,17 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
__kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
const size_t gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
const uint4 src1 = vload4((gid + 1), pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
@@ -75,32 +77,33 @@ __kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
|
||||
ALIGNED4(dst);
|
||||
ALIGNED4(src);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
dst[gid] = (uchar)(src[gid]);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferSideRegion(
|
||||
__global uchar* pDst,
|
||||
const __global uchar* pSrc,
|
||||
unsigned int len,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
idx_t len,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
__global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < len) {
|
||||
@@ -111,14 +114,14 @@ __kernel void CopyBufferToBufferSideRegion(
|
||||
__kernel void CopyBufferToBufferMiddleRegion(
|
||||
__global uint* pDst,
|
||||
const __global uint* pSrc,
|
||||
unsigned int elems,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
idx_t elems,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
__global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < elems) {
|
||||
@@ -126,4 +129,5 @@ __kernel void CopyBufferToBufferMiddleRegion(
|
||||
vstore4(loaded, gid, pDstWithOffset);
|
||||
}
|
||||
}
|
||||
|
||||
)==="
|
||||
@@ -1,113 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyBufferToBufferBytesStateless(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes,
|
||||
ulong bytesToRead )
|
||||
{
|
||||
pSrc += ( srcOffsetInBytes + get_global_id(0) );
|
||||
pDst += ( dstOffsetInBytes + get_global_id(0) );
|
||||
pDst[ 0 ] = pSrc[ 0 ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferLeftLeftoverStateless(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddleStateless(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddleMisalignedStateless(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
const size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits));
|
||||
result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits));
|
||||
result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits));
|
||||
vstore4(result, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftoverStateless(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void copyBufferToBufferBytesSingleStateless(__global uchar *dst, const __global uchar *src) {
|
||||
size_t gid = get_global_id(0);
|
||||
dst[gid] = (uchar)(src[gid]);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferSideRegionStateless(
|
||||
__global uchar* pDst,
|
||||
const __global uchar* pSrc,
|
||||
ulong len,
|
||||
ulong dstSshOffset,
|
||||
ulong srcSshOffset
|
||||
)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
__global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < len) {
|
||||
pDstWithOffset[ gid ] = pSrcWithOffset[ gid ];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddleRegionStateless(
|
||||
__global uint* pDst,
|
||||
const __global uint* pSrc,
|
||||
ulong elems,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
ulong srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
__global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < elems) {
|
||||
uint4 loaded = vload4(gid, pSrcWithOffset);
|
||||
vstore4(loaded, gid, pDstWithOffset);
|
||||
}
|
||||
}
|
||||
|
||||
)==="
|
||||
@@ -8,36 +8,38 @@
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
if(((ulong)(src + srcOffset)) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
@@ -51,19 +53,19 @@ __kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
if(((ulong)(src + srcOffset)) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
@@ -78,45 +80,44 @@ __kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d3To4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
|
||||
uint upper2 = 0;
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 3 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 3 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 3));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
if(((ulong)(src + srcOffset)) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
@@ -138,27 +139,26 @@ __kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d6To8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
|
||||
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 6 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 6 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 6 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 6));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
|
||||
|
||||
upper2 = upper = 0;
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 6 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 6 + 4));
|
||||
@@ -170,15 +170,15 @@ __kernel void CopyBufferToImage3d6To8Bytes(__global uchar *src,
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
@@ -211,16 +211,16 @@ __kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16BytesAligned(__global uint4 *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = src[(LOffset >> 4) + x];
|
||||
|
||||
|
||||
@@ -1,228 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
__kernel void CopyBufferToImage3dBytesStateless(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2BytesStateless(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
c.x = (uint)combined;
|
||||
}
|
||||
else{
|
||||
c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d4BytesStateless(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 4));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
}
|
||||
else{
|
||||
c.x = (*(__global uint*)(src + LOffset + x * 4));
|
||||
}
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d3To4BytesStateless(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
uint upper2 = 0;
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 3 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 3 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 3));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8BytesStateless(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 8));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 8 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 8 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
}
|
||||
else{
|
||||
c = *((__global uint2*)(src + LOffset + x * 8));
|
||||
}
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d6To8BytesStateless(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 6 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 6 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 6 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 6));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
|
||||
upper2 = upper = 0;
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 6 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 6 + 4));
|
||||
combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16BytesStateless(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 16));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 6));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 4));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.y = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 10));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 8));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.z = combined;
|
||||
upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15));
|
||||
upper = *((__global uchar*)(src + LOffset + x * 16 + 14));
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 16 + 12));
|
||||
combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.w = combined;
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16BytesAlignedStateless(__global uint4 *src,
|
||||
__write_only image3d_t output,
|
||||
ulong srcOffset,
|
||||
int4 dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = src[(LOffset >> 4) + x];
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
)==="
|
||||
@@ -6,17 +6,19 @@
|
||||
*/
|
||||
|
||||
R"===(
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
@@ -25,18 +27,18 @@ __kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
if(((ulong)(dst + dstOffset)) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
@@ -48,39 +50,37 @@ __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer3Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 2)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 1)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
if(((ulong)(dst + dstOffset)) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
@@ -92,19 +92,19 @@ __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4To3Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
@@ -113,40 +113,40 @@ __kernel void CopyImage3dToBuffer4To3Bytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer6Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 3)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 2)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 5)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 4)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 4)) = convert_uchar_sat(c.z & 0xff);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
if(((ulong)(dst + dstOffset)) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
@@ -163,16 +163,16 @@ __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8To6Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
@@ -188,14 +188,14 @@ __kernel void CopyImage3dToBuffer8To6Bytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
@@ -218,16 +218,16 @@ __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16BytesAligned(__read_only image3d_t input,
|
||||
__global uint4 *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__global uint4 *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
|
||||
@@ -1,238 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyImage3dToBufferBytesStateless(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer2BytesStateless(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer3BytesStateless(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 2)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 1)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4BytesStateless(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
else{
|
||||
*((__global uint*)(dst + DstOffset + x * 4)) = c.x;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4To3BytesStateless(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
|
||||
|
||||
__kernel void CopyImage3dToBuffer6BytesStateless(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 3)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 2)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 5)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 4)) = convert_uchar_sat(c.z & 0xff);
|
||||
}
|
||||
|
||||
|
||||
__kernel void CopyImage3dToBuffer8BytesStateless(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
else{
|
||||
uint2 d = (uint2)(c.x,c.y);
|
||||
*((__global uint2*)(dst + DstOffset + x * 8)) = d;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8To6BytesStateless(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6)) = convert_uchar_sat(c.x & 0xff);
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
}
|
||||
|
||||
|
||||
__kernel void CopyImage3dToBuffer16BytesStateless(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16BytesAlignedStateless(__read_only image3d_t input,
|
||||
__global uint4 *dst,
|
||||
int4 srcOffset,
|
||||
ulong dstOffset,
|
||||
ulong2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
ulong DstOffset = (dstOffset + (y * Pitch.x) + (z * Pitch.y));
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
dst[(DstOffset >> 4) + x] = c;
|
||||
}
|
||||
)==="
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -8,87 +8,92 @@
|
||||
R"===(
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t lid = get_local_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[lid];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
const offset_t patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
const offset_t patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
idx_t gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
const offset_t patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
uint gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
uint gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const __global uchar* pPattern,
|
||||
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
offset_t patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
ALIGNED4(pPattern);
|
||||
uint dstIndex = get_global_id(0);
|
||||
uint srcIndex = get_local_id(0);
|
||||
idx_t dstIndex = get_global_id(0);
|
||||
idx_t srcIndex = get_local_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytesStateless(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
size_t dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
size_t srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftoverStateless(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddleStateless(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftoverStateless(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateStateless(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateLeftOverStateless(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffsetStateless(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const __global uchar* pPattern,
|
||||
ulong patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
size_t dstIndex = get_global_id(0);
|
||||
size_t srcIndex = get_local_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
}
|
||||
)==="
|
||||
21
shared/source/built_ins/kernels/kernel_types.h
Normal file
21
shared/source/built_ins/kernels/kernel_types.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef KERNEL_TYPES_H
|
||||
#define KERNEL_TYPES_H 1
|
||||
#ifdef WIDE_STATELESS
|
||||
typedef ulong idx_t;
|
||||
typedef ulong2 coord2_t;
|
||||
typedef ulong4 coord4_t;
|
||||
typedef ulong offset_t;
|
||||
#else
|
||||
typedef uint idx_t;
|
||||
typedef uint2 coord2_t;
|
||||
typedef uint4 coord4_t;
|
||||
typedef uint offset_t;
|
||||
#endif
|
||||
#endif
|
||||
@@ -26,7 +26,7 @@ static RegisterEmbeddedResource registerCopyBufferToBufferSrc(
|
||||
copyBufferToBufferSrcSize);
|
||||
|
||||
static constexpr const char copyBufferToBufferStatelessSrc[] =
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel"
|
||||
;
|
||||
static constexpr size_t copyBufferToBufferStatelessSrcSize = sizeof(copyBufferToBufferStatelessSrc);
|
||||
|
||||
@@ -39,7 +39,7 @@ static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc(
|
||||
copyBufferToBufferStatelessSrcSize);
|
||||
|
||||
static constexpr const char copyBufferToBufferStatelessHeaplessSrc[] =
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel"
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel"
|
||||
;
|
||||
static constexpr size_t copyBufferToBufferStatelessHeaplessSrcSize = sizeof(copyBufferToBufferStatelessHeaplessSrc);
|
||||
|
||||
@@ -65,7 +65,7 @@ static RegisterEmbeddedResource registerCopyBufferRectSrc(
|
||||
copyBufferRectSrcSize);
|
||||
|
||||
static constexpr const char copyBufferRectStatelessSrc[] =
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_rect.builtin_kernel"
|
||||
;
|
||||
static constexpr size_t copyBufferRectStatelessSrcSize = sizeof(copyBufferRectStatelessSrc);
|
||||
|
||||
@@ -78,7 +78,7 @@ static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc(
|
||||
copyBufferRectStatelessSrcSize);
|
||||
|
||||
static constexpr const char copyBufferRectStatelessHeaplessSrc[] =
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel"
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_rect.builtin_kernel"
|
||||
;
|
||||
static constexpr size_t copyBufferRectStatelessHeaplessSrcSize = sizeof(copyBufferRectStatelessHeaplessSrc);
|
||||
|
||||
@@ -104,7 +104,7 @@ static RegisterEmbeddedResource registerFillBufferSrc(
|
||||
fillBufferSrcSize);
|
||||
|
||||
static constexpr const char fillBufferStatelessSrc[] =
|
||||
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
|
||||
#include "shared/source/built_ins/kernels/fill_buffer.builtin_kernel"
|
||||
;
|
||||
static constexpr size_t fillBufferStatelessSrcSize = sizeof(fillBufferStatelessSrc);
|
||||
|
||||
@@ -117,7 +117,7 @@ static RegisterEmbeddedResource registerFillBufferStatelessSrc(
|
||||
fillBufferStatelessSrcSize);
|
||||
|
||||
static constexpr const char fillBufferStatelessHeaplessSrc[] =
|
||||
#include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel"
|
||||
#include "shared/source/built_ins/kernels/fill_buffer.builtin_kernel"
|
||||
;
|
||||
static constexpr size_t fillBufferStatelessHeaplessSrcSize = sizeof(fillBufferStatelessHeaplessSrc);
|
||||
|
||||
@@ -143,7 +143,7 @@ static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
|
||||
copyBufferToImage3dSrcSize);
|
||||
|
||||
static constexpr const char copyBufferToImage3dStatelessSrc[] =
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel"
|
||||
#include "shared/source/built_ins/kernels/copy_buffer_to_image3d.builtin_kernel"
|
||||
;
|
||||
static constexpr size_t copyBufferToImage3dStatelessSrcSize = sizeof(copyBufferToImage3dStatelessSrc);
|
||||
|
||||
@@ -169,7 +169,7 @@ static RegisterEmbeddedResource registerCopyImage3dToBufferSrc(
|
||||
copyImage3dToBufferSrcSize);
|
||||
|
||||
static constexpr const char copyImage3dToBufferStatelessSrc[] =
|
||||
#include "shared/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel"
|
||||
#include "shared/source/built_ins/kernels/copy_image3d_to_buffer.builtin_kernel"
|
||||
;
|
||||
static constexpr size_t copyImage3dToBufferStatelessSrcSize = sizeof(copyImage3dToBufferStatelessSrc);
|
||||
|
||||
|
||||
@@ -45,7 +45,10 @@ inline constexpr ConstStringRef useCMCompiler = "-cmc";
|
||||
inline constexpr ConstStringRef enableFP64GenEmu = "-cl-fp64-gen-emu";
|
||||
inline constexpr ConstStringRef enableDivergentBarriers = "-cl-intel-enable-divergent-barrier-handling";
|
||||
inline constexpr ConstStringRef optDisableSendWarWa = "-ze-opt-disable-sendwarwa";
|
||||
|
||||
inline constexpr ConstStringRef statelessAddr = "-DWIDE_STATELESS=1";
|
||||
inline const std::string wideStatelessOptions =
|
||||
std::string(greaterThan4gbBuffersRequired) + " " + std::string(statelessAddr);
|
||||
inline const std::string statelessOptions = greaterThan4gbBuffersRequired.str();
|
||||
inline constexpr size_t nullterminateSize = 1U;
|
||||
inline constexpr size_t spaceSeparatorSize = 1U;
|
||||
|
||||
|
||||
@@ -7,5 +7,5 @@
|
||||
|
||||
#include "shared/test/common/helpers/kernel_binary_helper.h"
|
||||
|
||||
const std::string KernelBinaryHelper::BUILT_INS("6133084427540774618");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("10619261412647190096_images");
|
||||
const std::string KernelBinaryHelper::BUILT_INS("9138976784737507795");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("15961210520135641457_images");
|
||||
|
||||
@@ -11,14 +11,16 @@ __kernel void fullCopy(__global const uint* src, __global uint* dst) {
|
||||
vstore4(loaded, gid, dst);
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
offset_t bytesToRead )
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
@@ -30,24 +32,24 @@ __kernel void CopyBufferToBufferBytes(
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
@@ -57,17 +59,17 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
__kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
const size_t gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
const uint4 src1 = vload4((gid + 1), pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
@@ -80,32 +82,33 @@ __kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
|
||||
ALIGNED4(dst);
|
||||
ALIGNED4(src);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
dst[gid] = (uchar)(src[gid]);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferSideRegion(
|
||||
__global uchar* pDst,
|
||||
const __global uchar* pSrc,
|
||||
unsigned int len,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
idx_t len,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
__global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < len) {
|
||||
@@ -116,14 +119,14 @@ __kernel void CopyBufferToBufferSideRegion(
|
||||
__kernel void CopyBufferToBufferMiddleRegion(
|
||||
__global uint* pDst,
|
||||
const __global uint* pSrc,
|
||||
unsigned int elems,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
idx_t elems,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
__global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < elems) {
|
||||
@@ -132,179 +135,181 @@ __kernel void CopyBufferToBufferMiddleRegion(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t lid = get_local_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[lid];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
const offset_t patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
const offset_t patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
idx_t gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
const offset_t patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
uint gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
uint gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const __global uchar* pPattern,
|
||||
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
offset_t patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
ALIGNED4(pPattern);
|
||||
uint dstIndex = get_global_id(0);
|
||||
uint srcIndex = get_local_id(0);
|
||||
idx_t dstIndex = get_global_id(0);
|
||||
idx_t srcIndex = get_local_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
}
|
||||
|
||||
|
||||
void SetDstData(__global ulong* dst, uint currentOffset, ulong contextStart, ulong globalStart, ulong contextEnd, ulong globalEnd, uint useOnlyGlobalTimestamps) {
|
||||
dst[currentOffset] = globalStart;
|
||||
dst[currentOffset + 1] = globalEnd;
|
||||
@@ -535,36 +540,38 @@ __kernel void CopyImage1dBufferToImage1dBuffer(
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyBufferToImage3dBytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1));
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000001){
|
||||
if(((ulong)(src + srcOffset)) & 0x00000001){
|
||||
ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1));
|
||||
ushort lower = *((__global uchar*)(src + LOffset + x * 2));
|
||||
ushort combined = (upper << 8) | lower;
|
||||
@@ -578,19 +585,19 @@ __kernel void CopyBufferToImage3d2Bytes(__global uchar *src,
|
||||
|
||||
__kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000003){
|
||||
if(((ulong)(src + srcOffset)) & 0x00000003){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1));
|
||||
@@ -605,45 +612,44 @@ __kernel void CopyBufferToImage3d4Bytes(__global uchar *src,
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d3To4Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 1);
|
||||
|
||||
|
||||
uint upper2 = 0;
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 3 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 3 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 3));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
if(( ulong )(src + srcOffset) & 0x00000007){
|
||||
if(((ulong)(src + srcOffset)) & 0x00000007){
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1));
|
||||
@@ -665,27 +671,26 @@ __kernel void CopyBufferToImage3d8Bytes(__global uchar *src,
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d6To8Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8));
|
||||
|
||||
|
||||
|
||||
uint upper2 = *((__global uchar*)(src + LOffset + x * 6 + 3));
|
||||
uint upper = *((__global uchar*)(src + LOffset + x * 6 + 2));
|
||||
uint lower2 = *((__global uchar*)(src + LOffset + x * 6 + 1));
|
||||
uint lower = *((__global uchar*)(src + LOffset + x * 6));
|
||||
uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower;
|
||||
c.x = combined;
|
||||
|
||||
|
||||
upper2 = upper = 0;
|
||||
lower2 = *((__global uchar*)(src + LOffset + x * 6 + 5));
|
||||
lower = *((__global uchar*)(src + LOffset + x * 6 + 4));
|
||||
@@ -697,15 +702,15 @@ __kernel void CopyBufferToImage3d6To8Bytes(__global uchar *src,
|
||||
|
||||
__kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = (uint4)(0, 0, 0, 0);
|
||||
|
||||
@@ -738,33 +743,35 @@ __kernel void CopyBufferToImage3d16Bytes(__global uchar *src,
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToImage3d16BytesAligned(__global uint4 *src,
|
||||
__write_only image3d_t output,
|
||||
int srcOffset,
|
||||
int4 dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__write_only image3d_t output,
|
||||
offset_t srcOffset,
|
||||
int4 dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
int4 dstCoord = (int4)(x, y, z, 0) + dstOffset;
|
||||
uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = src[(LOffset >> 4) + x];
|
||||
|
||||
write_imageui(output, dstCoord, c);
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
*(dst + DstOffset + x) = convert_uchar_sat(c.x);
|
||||
@@ -773,18 +780,18 @@ __kernel void CopyImage3dToBufferBytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000001){
|
||||
if(((ulong)(dst + dstOffset)) & 0x00000001){
|
||||
*((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
@@ -796,39 +803,37 @@ __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer3Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 2)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 1)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
}
|
||||
|
||||
|
||||
__kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000003){
|
||||
if(((ulong)(dst + dstOffset)) & 0x00000003){
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
@@ -840,19 +845,19 @@ __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input,
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer4To3Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 3)) = convert_uchar_sat(c.x & 0xff);
|
||||
@@ -861,40 +866,40 @@ __kernel void CopyImage3dToBuffer4To3Bytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer6Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6)) = convert_uchar_sat(c.x & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 3)) = convert_uchar_sat((c.y >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 2)) = convert_uchar_sat(c.y & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 5)) = convert_uchar_sat((c.z >> 8 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 4)) = convert_uchar_sat(c.z & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 6 + 4)) = convert_uchar_sat(c.z & 0xff);
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
if(( ulong )(dst + dstOffset) & 0x00000007){
|
||||
if(((ulong)(dst + dstOffset)) & 0x00000007){
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff);
|
||||
*((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff);
|
||||
@@ -911,16 +916,16 @@ __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input,
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer8To6Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
@@ -936,14 +941,14 @@ __kernel void CopyImage3dToBuffer8To6Bytes(__read_only image3d_t input,
|
||||
__kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
__global uchar *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
@@ -966,16 +971,16 @@ __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input,
|
||||
}
|
||||
|
||||
__kernel void CopyImage3dToBuffer16BytesAligned(__read_only image3d_t input,
|
||||
__global uint4 *dst,
|
||||
int4 srcOffset,
|
||||
int dstOffset,
|
||||
uint2 Pitch) {
|
||||
const uint x = get_global_id(0);
|
||||
const uint y = get_global_id(1);
|
||||
const uint z = get_global_id(2);
|
||||
__global uint4 *dst,
|
||||
int4 srcOffset,
|
||||
offset_t dstOffset,
|
||||
coord2_t Pitch) {
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset;
|
||||
uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
offset_t DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y);
|
||||
|
||||
const uint4 c = read_imageui(input, srcCoord);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Intel Corporation
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -11,14 +11,16 @@ __kernel void fullCopy(__global const uint* src, __global uint* dst) {
|
||||
vstore4(loaded, gid, dst);
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
uint bytesToRead )
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
offset_t bytesToRead )
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
@@ -30,24 +32,24 @@ __kernel void CopyBufferToBufferBytes(
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
@@ -57,17 +59,17 @@ __kernel void CopyBufferToBufferMiddle(
|
||||
__kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__global const uint* pSrc,
|
||||
__global uint* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
uint misalignmentInBits)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
const size_t gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
const uint4 src0 = vload4(gid, pSrc);
|
||||
const uint4 src1 = vload4(gid + 1, pSrc);
|
||||
const uint4 src1 = vload4((gid + 1), pSrc);
|
||||
|
||||
uint4 result;
|
||||
result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
|
||||
@@ -80,32 +82,33 @@ __kernel void CopyBufferToBufferMiddleMisaligned(
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
uint srcOffsetInBytes,
|
||||
uint dstOffsetInBytes)
|
||||
offset_t srcOffsetInBytes,
|
||||
offset_t dstOffsetInBytes)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
|
||||
ALIGNED4(dst);
|
||||
ALIGNED4(src);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
dst[gid] = (uchar)(src[gid]);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferSideRegion(
|
||||
__global uchar* pDst,
|
||||
const __global uchar* pSrc,
|
||||
unsigned int len,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
idx_t len,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
__global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < len) {
|
||||
@@ -116,14 +119,14 @@ __kernel void CopyBufferToBufferSideRegion(
|
||||
__kernel void CopyBufferToBufferMiddleRegion(
|
||||
__global uint* pDst,
|
||||
const __global uint* pSrc,
|
||||
unsigned int elems,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
idx_t elems,
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(pSrc);
|
||||
ALIGNED4(pDst);
|
||||
unsigned int gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
__global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
|
||||
__global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
|
||||
if (gid < elems) {
|
||||
@@ -132,179 +135,181 @@ __kernel void CopyBufferToBufferMiddleRegion(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define ALIGNED4(ptr) __builtin_assume(((size_t)ptr&0b11) == 0)
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
uint srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t lid = get_local_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[lid];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
const offset_t patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
const offset_t patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
idx_t gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
uint dstOffsetInBytes,
|
||||
offset_t dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const uint patternSizeInEls )
|
||||
const offset_t patternSizeInEls )
|
||||
{
|
||||
ALIGNED4(pDst);
|
||||
ALIGNED4(pPattern);
|
||||
uint gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
idx_t gid = get_global_id(0);
|
||||
idx_t dstIndex = dstOffsetInBytes + gid;
|
||||
pDst[dstIndex] = pPattern[gid & (patternSizeInEls - 1)];
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
uint gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
uint gid = get_global_id(0);
|
||||
idx_t gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
offset_t dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const __global uchar* pPattern,
|
||||
uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
offset_t patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment
|
||||
)
|
||||
{
|
||||
ALIGNED4(ptr);
|
||||
ALIGNED4(pPattern);
|
||||
uint dstIndex = get_global_id(0);
|
||||
uint srcIndex = get_local_id(0);
|
||||
idx_t dstIndex = get_global_id(0);
|
||||
idx_t srcIndex = get_local_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
__global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset;
|
||||
pDst[dstIndex] = pSrc[srcIndex];
|
||||
}
|
||||
|
||||
#include "kernel_types.h"
|
||||
|
||||
__kernel void CopyBufferRectBytes2d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle2d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint2 SrcOrigin,
|
||||
uint2 DstOrigin,
|
||||
uint SrcPitch,
|
||||
uint DstPitch )
|
||||
|
||||
coord2_t SrcOrigin,
|
||||
coord2_t DstOrigin,
|
||||
idx_t SrcPitch,
|
||||
idx_t DstPitch )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytes3d(
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
|
||||
__global const char* src,
|
||||
__global char* dst,
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
idx_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
*( dst + LDstOffset ) = *( src + LSrcOffset );
|
||||
}
|
||||
|
||||
__kernel void CopyBufferRectBytesMiddle3d(
|
||||
const __global uint* src,
|
||||
__global uint* dst,
|
||||
uint4 SrcOrigin,
|
||||
uint4 DstOrigin,
|
||||
uint2 SrcPitch,
|
||||
uint2 DstPitch )
|
||||
|
||||
coord4_t SrcOrigin,
|
||||
coord4_t DstOrigin,
|
||||
coord2_t SrcPitch,
|
||||
coord2_t DstPitch )
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int z = get_global_id(2);
|
||||
|
||||
uint LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
uint LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
idx_t x = get_global_id(0);
|
||||
idx_t y = get_global_id(1);
|
||||
idx_t z = get_global_id(2);
|
||||
|
||||
idx_t LSrcOffset = SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y );
|
||||
idx_t LDstOffset = DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y );
|
||||
|
||||
src += LSrcOffset >> 2;
|
||||
dst += LDstOffset >> 2;
|
||||
|
||||
uint4 loaded = vload4(x,src);
|
||||
vstore4(loaded,x,dst);
|
||||
|
||||
uint4 loaded = vload4(x, src);
|
||||
vstore4(loaded, x, dst);
|
||||
}
|
||||
|
||||
|
||||
void SetDstData(__global ulong* dst, uint currentOffset, ulong contextStart, ulong globalStart, ulong contextEnd, ulong globalEnd, uint useOnlyGlobalTimestamps) {
|
||||
dst[currentOffset] = globalStart;
|
||||
dst[currentOffset + 1] = globalEnd;
|
||||
@@ -5,7 +5,7 @@
|
||||
#
|
||||
|
||||
set(SHARED_TEST_PROJECTS_SUB_FOLDER "prepare test files")
|
||||
function(compile_kernels_gen device revision_id platform_name use_stateless_suffix use_heapless)
|
||||
function(compile_kernels_gen device revision_id platform_name use_stateless use_heapless)
|
||||
|
||||
set(outputdir "${TargetDir}/${platform_name}/${revision_id}/test_files/${NEO_ARCH}/")
|
||||
set(compiled_kernels)
|
||||
@@ -19,8 +19,8 @@ function(compile_kernels_gen device revision_id platform_name use_stateless_suff
|
||||
set(outputname_base "${basename}_${platform_name}")
|
||||
if(${use_heapless})
|
||||
set(outputname_base "${outputname_base}-heapless_")
|
||||
elseif(${use_stateless_suffix})
|
||||
set(outputname_base "${outputname_base}-cl-intel-greater-than-4GB-buffer-required_")
|
||||
elseif(${use_stateless})
|
||||
set(outputname_base "${outputname_base}-cl-intel-greater-than-4GB-buffer-required_-DWIDE_STATELESS=1_")
|
||||
endif()
|
||||
|
||||
set(outputpath_base "${outputdir}${outputname_base}")
|
||||
@@ -38,9 +38,14 @@ function(compile_kernels_gen device revision_id platform_name use_stateless_suff
|
||||
set(internal_options "${HEAPLESS_INTERNAL_OPTIONS}")
|
||||
endif()
|
||||
|
||||
if(${use_stateless} OR ${use_heapless})
|
||||
list(APPEND __ocloc__options__ "-DWIDE_STATELESS=1")
|
||||
endif()
|
||||
list(APPEND __ocloc__options__ "-I${NEO_SOURCE_DIR}/shared/source/built_ins/kernels")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${output_files}
|
||||
COMMAND ${ocloc_cmd_prefix} -file ${absolute_filepath} -device ${device} -heapless_mode ${heapless_mode} -internal_options ${internal_options} -${NEO_BITS} -out_dir ${outputdir} -output_no_suffix -output ${outputname_base} -revision_id ${revision_id}
|
||||
COMMAND ${ocloc_cmd_prefix} -file ${absolute_filepath} -device ${device} -heapless_mode ${heapless_mode} -internal_options ${internal_options} -${NEO_BITS} -out_dir ${outputdir} -output_no_suffix -output ${outputname_base} -revision_id ${revision_id} -options "$<JOIN:${__ocloc__options__}, >"
|
||||
WORKING_DIRECTORY ${workdir}
|
||||
DEPENDS ${filepath} ocloc copy_compiler_files
|
||||
)
|
||||
@@ -79,6 +84,7 @@ add_dependencies(prepare_test_kernels_for_shared copy_compiler_files)
|
||||
|
||||
macro(macro_for_each_platform)
|
||||
set(KERNELS_TO_COMPILE ${TEST_KERNELS})
|
||||
|
||||
set(IMAGE_SUPPORT FALSE)
|
||||
CORE_CONTAINS_PLATFORM("SUPPORTED_IMAGES" ${CORE_TYPE} ${PLATFORM_IT} IMAGE_SUPPORT)
|
||||
if(NOT ${IMAGE_SUPPORT})
|
||||
|
||||
@@ -84,7 +84,7 @@ HWTEST2_F(BuiltInSharedTest, GivenStatelessBuiltinWhenGettingResourceNameThenAdd
|
||||
|
||||
auto resourceNames = getBuiltinResourceNames(EBuiltInOps::copyBufferToBufferStateless, BuiltinCode::ECodeType::binary, *pDevice);
|
||||
|
||||
std::string expectedResourceNameGeneric = "stateless_copy_buffer_to_buffer_stateless.builtin_kernel.bin";
|
||||
std::string expectedResourceNameGeneric = "stateless_copy_buffer_to_buffer.builtin_kernel.bin";
|
||||
std::string expectedResourceNameForRelease = deviceIpString + "_" + expectedResourceNameGeneric;
|
||||
|
||||
EXPECT_EQ(1u, resourceNames.size());
|
||||
@@ -103,7 +103,7 @@ HWTEST2_F(BuiltInSharedTest, GivenPlatformWithoutStatefulAddresingSupportWhenGet
|
||||
|
||||
{
|
||||
auto resourceNames = getBuiltinResourceNames(EBuiltInOps::copyBufferToBufferStateless, BuiltinCode::ECodeType::binary, *pDevice);
|
||||
std::string expectedResourceName = deviceIpString + "_stateless_copy_buffer_to_buffer_stateless.builtin_kernel.bin";
|
||||
std::string expectedResourceName = deviceIpString + "_stateless_copy_buffer_to_buffer.builtin_kernel.bin";
|
||||
EXPECT_EQ(1u, resourceNames.size());
|
||||
EXPECT_EQ(resourceNames[0], expectedResourceName);
|
||||
}
|
||||
@@ -130,20 +130,20 @@ TEST_F(BuiltInSharedTest, GivenValidBuiltinTypeAndExtensionWhenCreatingBuiltinRe
|
||||
const std::pair<EBuiltInOps::Type, const char *> testCases[] = {
|
||||
{EBuiltInOps::auxTranslation, "aux_translation.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBuffer, "copy_buffer_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBufferStateless, "copy_buffer_to_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBufferStatelessHeapless, "copy_buffer_to_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBufferStateless, "copy_buffer_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToBufferStatelessHeapless, "copy_buffer_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRect, "copy_buffer_rect.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRectStateless, "copy_buffer_rect_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRectStatelessHeapless, "copy_buffer_rect_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRectStateless, "copy_buffer_rect.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferRectStatelessHeapless, "copy_buffer_rect.builtin_kernel"},
|
||||
{EBuiltInOps::fillBuffer, "fill_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::fillBufferStateless, "fill_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::fillBufferStatelessHeapless, "fill_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::fillBufferStateless, "fill_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::fillBufferStatelessHeapless, "fill_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3d, "copy_buffer_to_image3d.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3dStateless, "copy_buffer_to_image3d_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3dHeapless, "copy_buffer_to_image3d_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3dStateless, "copy_buffer_to_image3d.builtin_kernel"},
|
||||
{EBuiltInOps::copyBufferToImage3dHeapless, "copy_buffer_to_image3d.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBuffer, "copy_image3d_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBufferStateless, "copy_image3d_to_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBufferHeapless, "copy_image3d_to_buffer_stateless.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBufferStateless, "copy_image3d_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyImage3dToBufferHeapless, "copy_image3d_to_buffer.builtin_kernel"},
|
||||
{EBuiltInOps::copyImageToImage1d, "copy_image_to_image1d.builtin_kernel"},
|
||||
{EBuiltInOps::copyImageToImage1dHeapless, "copy_image_to_image1d.builtin_kernel"},
|
||||
{EBuiltInOps::copyImageToImage2d, "copy_image_to_image2d.builtin_kernel"},
|
||||
@@ -195,9 +195,9 @@ TEST_F(BuiltInSharedTest, GivenHeaplessModeEnabledWhenGetBuiltinResourceNamesIsC
|
||||
};
|
||||
|
||||
TestParam params[] = {
|
||||
{"copy_buffer_to_buffer_stateless", EBuiltInOps::copyBufferToBufferStatelessHeapless},
|
||||
{"copy_buffer_rect_stateless", EBuiltInOps::copyBufferRectStatelessHeapless},
|
||||
{"fill_buffer_stateless", EBuiltInOps::fillBufferStatelessHeapless}};
|
||||
{"copy_buffer_to_buffer", EBuiltInOps::copyBufferToBufferStatelessHeapless},
|
||||
{"copy_buffer_rect", EBuiltInOps::copyBufferRectStatelessHeapless},
|
||||
{"fill_buffer", EBuiltInOps::fillBufferStatelessHeapless}};
|
||||
|
||||
for (auto &[builtInTypeAsString, builtInType] : params) {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user