/* * Copyright (C) 2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "runtime/program/kernel_info_from_patchtokens.h" #include "runtime/compiler_interface/patchtokens_decoder.h" #include "runtime/program/kernel_info.h" namespace NEO { using namespace iOpenCL; template inline void storeTokenIfNotNull(KernelInfo &kernelInfo, T *token) { if (token != nullptr) { kernelInfo.storePatchToken(token); } } template inline uint32_t getOffset(T *token) { if (token != nullptr) { return token->Offset; } return WorkloadInfo::undefinedOffset; } void populateKernelInfoArg(KernelInfo &dstKernelInfo, KernelArgInfo &dstKernelInfoArg, const PatchTokenBinary::KernelArgFromPatchtokens src) { dstKernelInfoArg.needPatch = true; dstKernelInfo.storeArgInfo(src.argInfo); if (src.objectArg != nullptr) { switch (src.objectArg->Token) { default: UNRECOVERABLE_IF(true); case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; } } switch (src.objectType) { default: UNRECOVERABLE_IF(PatchTokenBinary::ArgObjectType::None != src.objectType); break; case PatchTokenBinary::ArgObjectType::Buffer: dstKernelInfoArg.offsetBufferOffset = getOffset(src.metadata.buffer.bufferOffset); dstKernelInfoArg.pureStatefulBufferAccess = (src.metadata.buffer.pureStateful != nullptr); break; case PatchTokenBinary::ArgObjectType::Image: dstKernelInfoArg.offsetImgWidth = getOffset(src.metadata.image.width); dstKernelInfoArg.offsetImgHeight = getOffset(src.metadata.image.height); dstKernelInfoArg.offsetImgDepth = getOffset(src.metadata.image.depth); dstKernelInfoArg.offsetChannelDataType = getOffset(src.metadata.image.channelDataType); dstKernelInfoArg.offsetChannelOrder = getOffset(src.metadata.image.channelOrder); dstKernelInfoArg.offsetArraySize = getOffset(src.metadata.image.arraySize); dstKernelInfoArg.offsetNumSamples = getOffset(src.metadata.image.numSamples); dstKernelInfoArg.offsetNumMipLevels = getOffset(src.metadata.image.numMipLevels); dstKernelInfoArg.offsetFlatBaseOffset = getOffset(src.metadata.image.flatBaseOffset); dstKernelInfoArg.offsetFlatWidth = getOffset(src.metadata.image.flatWidth); dstKernelInfoArg.offsetFlatHeight = getOffset(src.metadata.image.flatHeight); dstKernelInfoArg.offsetFlatPitch = getOffset(src.metadata.image.flatPitch); break; case PatchTokenBinary::ArgObjectType::Sampler: dstKernelInfoArg.offsetSamplerSnapWa = getOffset(src.metadata.sampler.coordinateSnapWaRequired); dstKernelInfoArg.offsetSamplerAddressingMode = getOffset(src.metadata.sampler.addressMode); dstKernelInfoArg.offsetSamplerNormalizedCoords = getOffset(src.metadata.sampler.normalizedCoords); break; case PatchTokenBinary::ArgObjectType::Slm: dstKernelInfoArg.slmAlignment = src.metadata.slm.token->SourceOffset; break; } switch (src.objectTypeSpecialized) { default: UNRECOVERABLE_IF(PatchTokenBinary::ArgObjectTypeSpecialized::None != src.objectTypeSpecialized); break; case PatchTokenBinary::ArgObjectTypeSpecialized::Vme: dstKernelInfoArg.offsetVmeMbBlockType = getOffset(src.metadataSpecialized.vme.mbBlockType); dstKernelInfoArg.offsetVmeSubpixelMode = getOffset(src.metadataSpecialized.vme.subpixelMode); dstKernelInfoArg.offsetVmeSadAdjustMode = getOffset(src.metadataSpecialized.vme.sadAdjustMode); dstKernelInfoArg.offsetVmeSearchPathType = getOffset(src.metadataSpecialized.vme.searchPathType); break; } for (auto &byValArg : src.byValMap) { dstKernelInfo.storeKernelArgument(byValArg); if (byValArg->Type == DATA_PARAMETER_KERNEL_ARGUMENT) { dstKernelInfo.patchInfo.dataParameterBuffersKernelArgs.push_back(byValArg); } } dstKernelInfoArg.offsetObjectId = getOffset(src.objectId); } void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src) { dst.heapInfo.pKernelHeader = src.header; dst.name = std::string(src.name.begin(), src.name.end()).c_str(); dst.heapInfo.pKernelHeap = src.isa.begin(); dst.heapInfo.pGsh = src.heaps.generalState.begin(); dst.heapInfo.pDsh = src.heaps.dynamicState.begin(); dst.heapInfo.pSsh = src.heaps.surfaceState.begin(); storeTokenIfNotNull(dst, src.tokens.executionEnvironment); dst.patchInfo.samplerStateArray = src.tokens.samplerStateArray; dst.patchInfo.bindingTableState = src.tokens.bindingTableState; dst.usesSsh = src.tokens.bindingTableState && (src.tokens.bindingTableState->Count > 0); dst.patchInfo.localsurface = src.tokens.allocateLocalSurface; dst.workloadInfo.slmStaticSize = src.tokens.allocateLocalSurface ? src.tokens.allocateLocalSurface->TotalInlineLocalMemorySize : 0U; dst.patchInfo.mediavfestate = src.tokens.mediaVfeState[0]; dst.patchInfo.mediaVfeStateSlot1 = src.tokens.mediaVfeState[1]; dst.patchInfo.interfaceDescriptorDataLoad = src.tokens.mediaInterfaceDescriptorLoad; dst.patchInfo.interfaceDescriptorData = src.tokens.interfaceDescriptorData; dst.patchInfo.threadPayload = src.tokens.threadPayload; dst.patchInfo.dataParameterStream = src.tokens.dataParameterStream; dst.patchInfo.kernelArgumentInfo.reserve(src.tokens.kernelArgs.size()); dst.kernelArgInfo.resize(src.tokens.kernelArgs.size()); dst.argumentsToPatchNum = static_cast(src.tokens.kernelArgs.size()); for (size_t i = 0U; i < src.tokens.kernelArgs.size(); ++i) { auto &decodedKernelArg = src.tokens.kernelArgs[i]; auto &kernelInfoArg = dst.kernelArgInfo[i]; populateKernelInfoArg(dst, kernelInfoArg, decodedKernelArg); } storeTokenIfNotNull(dst, src.tokens.kernelAttributesInfo); storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrivateSurface); storeTokenIfNotNull(dst, src.tokens.allocateStatelessConstantMemorySurfaceWithInitialization); storeTokenIfNotNull(dst, src.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization); storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrintfSurface); storeTokenIfNotNull(dst, src.tokens.allocateStatelessEventPoolSurface); storeTokenIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface); storeTokenIfNotNull(dst, src.tokens.allocateSyncBuffer); for (auto &str : src.tokens.strings) { dst.storePatchToken(str); } dst.isVmeWorkload = dst.isVmeWorkload || (src.tokens.inlineVmeSamplerInfo != nullptr); dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U; storeTokenIfNotNull(dst, src.tokens.allocateSystemThreadSurface); for (uint32_t i = 0; i < 3U; ++i) { dst.workloadInfo.localWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize[i]); dst.workloadInfo.localWorkSizeOffsets2[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize2[i]); dst.workloadInfo.globalWorkOffsetOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkOffset[i]); dst.workloadInfo.enqueuedLocalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[i]); dst.workloadInfo.globalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkSize[i]); dst.workloadInfo.numWorkGroupsOffset[i] = getOffset(src.tokens.crossThreadPayloadArgs.numWorkGroups[i]); } dst.workloadInfo.maxWorkGroupSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.maxWorkGroupSize); dst.workloadInfo.workDimOffset = getOffset(src.tokens.crossThreadPayloadArgs.workDimensions); dst.workloadInfo.simdSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.simdSize); dst.workloadInfo.parentEventOffset = getOffset(src.tokens.crossThreadPayloadArgs.parentEvent); dst.workloadInfo.preferredWkgMultipleOffset = getOffset(src.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple); dst.workloadInfo.privateMemoryStatelessSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize); dst.workloadInfo.localMemoryStatelessWindowSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize); dst.workloadInfo.localMemoryStatelessWindowStartAddressOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress); for (auto &childSimdSize : src.tokens.crossThreadPayloadArgs.childBlockSimdSize) { dst.childrenKernelsIdOffset.push_back({childSimdSize->ArgumentNumber, childSimdSize->Offset}); } if (src.tokens.gtpinInfo) { dst.igcInfoForGtpin = reinterpret_cast(src.tokens.gtpinInfo + 1); } dst.isValid = (false == NEO::PatchTokenBinary::hasInvalidChecksum(src)); } } // namespace NEO