diff --git a/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp b/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp index e70e04b5c0..a44a18270c 100644 --- a/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp +++ b/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -136,30 +136,3 @@ TEST_F(KernelDataTest, GIVENdataParameterObjectIdWHENdecodeTokensTHENoffsetLocat auto deviceSideEnqueueDesc = reinterpret_cast(pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors[argNum].get()); EXPECT_EQ(offsetObjectId, deviceSideEnqueueDesc->objectId); } - -TEST_F(KernelDataTest, GIVENdataParameterChildSimdSizeWHENdecodeTokensTHENchildsIdsStoredInKernelInfoWithOffset) { - SPatchDataParameterBuffer patchList[3]; - uint32_t childrenKernelIds[3] = {7, 14, 21}; - uint32_t childrenSimdSizeOffsets[3] = {0x77, 0xAB, 0xCD}; - - for (int i = 0; i < 3; i++) { - patchList[i].Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; - patchList[i].Size = sizeof(SPatchDataParameterBuffer); - patchList[i].Type = DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE; - patchList[i].ArgumentNumber = childrenKernelIds[i]; - patchList[i].Offset = childrenSimdSizeOffsets[i]; - patchList[i].DataSize = sizeof(uint32_t); - patchList[i].SourceOffset = 0; - } - - pPatchList = patchList; - patchListSize = sizeof(patchList); - - buildAndDecode(); - - ASSERT_GE(pKernelInfo->childrenKernelsIdOffset.size(), size_t(3u)); - for (int i = 0; i < 3; i++) { - EXPECT_EQ(pKernelInfo->childrenKernelsIdOffset[i].first, childrenKernelIds[i]); - EXPECT_EQ(pKernelInfo->childrenKernelsIdOffset[i].second, childrenSimdSizeOffsets[i]); - } -} diff --git a/shared/source/device_binary_format/patchtokens_decoder.cpp b/shared/source/device_binary_format/patchtokens_decoder.cpp index 0dc821eb54..a91d00d194 100644 --- a/shared/source/device_binary_format/patchtokens_decoder.cpp +++ b/shared/source/device_binary_format/patchtokens_decoder.cpp @@ -284,9 +284,6 @@ inline void decodeKernelDataParameterToken(const SPatchDataParameterBuffer *toke case DATA_PARAMETER_PARENT_EVENT: crossthread.parentEvent = token; break; - case DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE: - crossthread.childBlockSimdSize.push_back(token); - break; case DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE: crossthread.preferredWorkgroupMultiple = token; break; @@ -303,6 +300,7 @@ inline void decodeKernelDataParameterToken(const SPatchDataParameterBuffer *toke case DATA_PARAMETER_EXECUTION_MASK: case DATA_PARAMETER_VME_IMAGE_TYPE: case DATA_PARAMETER_VME_MB_SKIP_BLOCK_TYPE: + case DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE: // ignored intentionally break; } diff --git a/shared/source/device_binary_format/patchtokens_decoder.h b/shared/source/device_binary_format/patchtokens_decoder.h index fc52c2ad5e..9b1d1e7eba 100644 --- a/shared/source/device_binary_format/patchtokens_decoder.h +++ b/shared/source/device_binary_format/patchtokens_decoder.h @@ -156,7 +156,6 @@ struct KernelFromPatchtokens { const SPatchDataParameterBuffer *localMemoryStatelessWindowSize = nullptr; const SPatchDataParameterBuffer *localMemoryStatelessWindowStartAddress = nullptr; const SPatchDataParameterBuffer *preferredWorkgroupMultiple = nullptr; - StackVec childBlockSimdSize; const SPatchDataParameterBuffer *implicitArgsBufferOffset = nullptr; } crossThreadPayloadArgs; } tokens; diff --git a/shared/source/device_binary_format/patchtokens_dumper.cpp b/shared/source/device_binary_format/patchtokens_dumper.cpp index a26554700f..a4350db45a 100644 --- a/shared/source/device_binary_format/patchtokens_dumper.cpp +++ b/shared/source/device_binary_format/patchtokens_dumper.cpp @@ -764,9 +764,7 @@ std::string asString(const KernelFromPatchtokens &kern) { dumpOrNull(kern.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize, "", stream, indentLevel1); dumpOrNull(kern.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress, "", stream, indentLevel1); dumpOrNull(kern.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple, "", stream, indentLevel1); - dumpVecIfNotEmpty(kern.tokens.crossThreadPayloadArgs.childBlockSimdSize, "Child block simd size(s)", stream, indentLevel1); dumpOrNull(kern.tokens.crossThreadPayloadArgs.implicitArgsBufferOffset, "", stream, indentLevel1); - if (kern.tokens.kernelArgs.size() != 0) { stream << "Kernel arguments [" << kern.tokens.kernelArgs.size() << "] :\n"; for (size_t i = 0; i < kern.tokens.kernelArgs.size(); ++i) { diff --git a/shared/source/program/kernel_info_from_patchtokens.cpp b/shared/source/program/kernel_info_from_patchtokens.cpp index 55415665fb..a53d5e6836 100644 --- a/shared/source/program/kernel_info_from_patchtokens.cpp +++ b/shared/source/program/kernel_info_from_patchtokens.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -38,10 +38,6 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U; - for (auto &childSimdSize : src.tokens.crossThreadPayloadArgs.childBlockSimdSize) { - dst.childrenKernelsIdOffset.push_back({childSimdSize->ArgumentNumber, childSimdSize->Offset}); - } - if (src.tokens.gtpinInfo) { dst.igcInfoForGtpin = reinterpret_cast(src.tokens.gtpinInfo + 1); } diff --git a/shared/test/unit_test/device_binary_format/patchtokens_decoder_tests.cpp b/shared/test/unit_test/device_binary_format/patchtokens_decoder_tests.cpp index 975a421f5d..efdb331d63 100644 --- a/shared/test/unit_test/device_binary_format/patchtokens_decoder_tests.cpp +++ b/shared/test/unit_test/device_binary_format/patchtokens_decoder_tests.cpp @@ -63,7 +63,6 @@ bool hasEmptyTokensInfo(const NEO::PatchTokenBinary::KernelFromPatchtokens &kern empty &= nullptr == toks.crossThreadPayloadArgs.localMemoryStatelessWindowSize; empty &= nullptr == toks.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress; empty &= nullptr == toks.crossThreadPayloadArgs.preferredWorkgroupMultiple; - empty &= toks.crossThreadPayloadArgs.childBlockSimdSize.empty(); return empty; } @@ -576,9 +575,6 @@ TEST(KernelDecoder, GivenKernelWithValidNonArgCrossThreadDataPatchtokensThenDeco auto localMemoryStatelessWindowStartAddrOff = pushBackDataParameterToken(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS, storage); auto parentEventOff = pushBackDataParameterToken(DATA_PARAMETER_PARENT_EVENT, storage); auto preferredWorkgroupMultipleOff = pushBackDataParameterToken(DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE, storage); - auto childBlockSimdSize0Off = pushBackDataParameterToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, storage); - auto childBlockSimdSize1Off = pushBackDataParameterToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, storage); - auto childBlockSimdSize2Off = pushBackDataParameterToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, storage); auto implictArgBufferOffset = pushBackDataParameterToken(DATA_PARAMETER_IMPL_ARG_BUFFER, storage); ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin()); @@ -618,10 +614,6 @@ TEST(KernelDecoder, GivenKernelWithValidNonArgCrossThreadDataPatchtokensThenDeco EXPECT_TRUE(tokenOffsetMatched(base, localMemoryStatelessWindowStartAddrOff, decodedKernel.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress)); EXPECT_TRUE(tokenOffsetMatched(base, parentEventOff, decodedKernel.tokens.crossThreadPayloadArgs.parentEvent)); EXPECT_TRUE(tokenOffsetMatched(base, preferredWorkgroupMultipleOff, decodedKernel.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple)); - ASSERT_EQ(3U, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize.size()); - EXPECT_TRUE(tokenOffsetMatched(base, childBlockSimdSize0Off, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize[0])); - EXPECT_TRUE(tokenOffsetMatched(base, childBlockSimdSize1Off, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize[1])); - EXPECT_TRUE(tokenOffsetMatched(base, childBlockSimdSize2Off, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize[2])); EXPECT_TRUE(tokenOffsetMatched(base, implictArgBufferOffset, decodedKernel.tokens.crossThreadPayloadArgs.implicitArgsBufferOffset)); } diff --git a/shared/test/unit_test/device_binary_format/patchtokens_dumper_tests.cpp b/shared/test/unit_test/device_binary_format/patchtokens_dumper_tests.cpp index 49ce68fc68..ea3d094dfb 100644 --- a/shared/test/unit_test/device_binary_format/patchtokens_dumper_tests.cpp +++ b/shared/test/unit_test/device_binary_format/patchtokens_dumper_tests.cpp @@ -905,8 +905,6 @@ TEST(KernelDumper, GivenKernelWithNonArgCrossThreadDataPatchtokensThenProperlyCr auto localMemoryStatelessWindowSize = initDataParameterBufferToken(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE); auto localMemoryStatelessWindowStartAddress = initDataParameterBufferToken(DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS); auto preferredWorkgroupMultiple = initDataParameterBufferToken(DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE); - SPatchDataParameterBuffer childBlockSimdSize[2] = {initDataParameterBufferToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE), - initDataParameterBufferToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, 2U)}; auto unknownToken0 = initDataParameterBufferToken(NUM_DATA_PARAMETER_TOKENS); auto unknownToken1 = initDataParameterBufferToken(NUM_DATA_PARAMETER_TOKENS); @@ -936,8 +934,6 @@ TEST(KernelDumper, GivenKernelWithNonArgCrossThreadDataPatchtokensThenProperlyCr kernel.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize = &localMemoryStatelessWindowSize; kernel.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress = &localMemoryStatelessWindowStartAddress; kernel.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple = &preferredWorkgroupMultiple; - kernel.tokens.crossThreadPayloadArgs.childBlockSimdSize.push_back(&childBlockSimdSize[0]); - kernel.tokens.crossThreadPayloadArgs.childBlockSimdSize.push_back(&childBlockSimdSize[1]); kernel.unhandledTokens.push_back(&unknownToken0); kernel.unhandledTokens.push_back(&unknownToken1); @@ -1334,35 +1330,6 @@ Kernel-scope tokens section size : )===" uint32_t LocationIndex2;// = 0 uint32_t IsEmulationArgument;// = 0 } - Child block simd size(s) [2] : - + [0]: - | struct SPatchDataParameterBuffer : - | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" - << tokenSize << R"===() - | { - | uint32_t Type;// = 38(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE) - | uint32_t ArgumentNumber;// = 0 - | uint32_t Offset;// = 0 - | uint32_t DataSize;// = 0 - | uint32_t SourceOffset;// = 0 - | uint32_t LocationIndex;// = 0 - | uint32_t LocationIndex2;// = 0 - | uint32_t IsEmulationArgument;// = 0 - | } - + [1]: - | struct SPatchDataParameterBuffer : - | SPatchItemHeader (Token=17(PATCH_TOKEN_DATA_PARAMETER_BUFFER), Size=)===" - << tokenSize << R"===() - | { - | uint32_t Type;// = 38(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE) - | uint32_t ArgumentNumber;// = 0 - | uint32_t Offset;// = 0 - | uint32_t DataSize;// = 0 - | uint32_t SourceOffset;// = 8 - | uint32_t LocationIndex;// = 0 - | uint32_t LocationIndex2;// = 0 - | uint32_t IsEmulationArgument;// = 0 - | } )==="; EXPECT_STREQ(expected.str().c_str(), generated.c_str()); } @@ -2094,7 +2061,9 @@ TEST(PatchTokenDumper, GivenAnyTokenThenDumpingIsHandled) { auto kernelDataParamToken = static_cast(kernelToken); *kernelDataParamToken = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_BUFFER_OFFSET); kernelDataParamToken->Size = maxTokenSize; - std::unordered_set dataParamTokensPasslist{6, 7, 17, 19, 36, 37, 39, 40, 41}; + + std::unordered_set dataParamTokensPasslist{6, 7, 17, 19, 36, 37, 38, 39, 40, 41}; + for (int i = 0; i < iOpenCL::NUM_DATA_PARAMETER_TOKENS; ++i) { if (dataParamTokensPasslist.count(i) != 0) { continue;