diff --git a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.h b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.h index a848c63a55..1b772d76d6 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.h +++ b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.h @@ -36,8 +36,6 @@ struct MutableAppendLaunchKernelWithParams { struct MutableAppendLaunchKernelEvents { CommandToPatch signalCmd; - size_t currentSignalEventDescriptorIndex = std::numeric_limits::max(); - bool waitEvents = false; bool l3FlushEventSyncCmd = false; bool l3FlushEventTimestampSyncCmds = false; @@ -113,7 +111,7 @@ struct MutableCommandListCoreFamily : public MutableCommandListImp, public Comma void storeKernelArgumentAndDispatchVariables(MutableAppendLaunchKernelWithParams &mutableParams, CmdListKernelLaunchParams &launchParams, Kernel *kernel, - MutationVariables *variableDescriptors, + KernelVariableDescriptor *kernelVariables, ze_mutable_command_exp_flags_t mutableFlags); void storeSignalEventVariable(MutableAppendLaunchKernelEvents &mutableEventParams, CmdListKernelLaunchParams &launchParams, @@ -126,7 +124,7 @@ struct MutableCommandListCoreFamily : public MutableCommandListImp, public Comma std::vector &variableLoadRegisterImmList); void captureRegularWaitEventCommands(CommandToPatchContainer::iterator &cmdsIterator, std::vector &variableSemaphoreWaitList); - void captureCounterBasedTimestampSignalEventCommands(MutableVariableDescriptor ¤tMutableSignalEvent, + void captureCounterBasedTimestampSignalEventCommands(SignalEventVariableDescriptor ¤tMutableSignalEvent, std::vector &variableSemaphoreWaitList, std::vector &variableStoreDataImmList); void captureStandaloneTimestampSignalEventCommands(std::vector &variableStoreRegisterMem); diff --git a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl index 935ed3593e..c25e93f6f1 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl +++ b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl @@ -178,10 +178,11 @@ inline ze_result_t MutableCommandListCoreFamily::appendLaunchKern if ((currentAppend.mutationFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS) == ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS) { if (numWaitEvents > 0) { + currentAppend.variables.waitEvents.reserve(numWaitEvents); mutableEventParams.waitEvents = true; bool omitWaitEventResidency = false; for (uint32_t i = 0; i < numWaitEvents; i++) { - MutableVariableDescriptor mutableWaitEvent = {}; + WaitEventVariableDescriptor mutableWaitEventDesc = {}; Event *event = Event::fromHandle(phWaitEvents[i]); @@ -191,21 +192,21 @@ inline ze_result_t MutableCommandListCoreFamily::appendLaunchKern variable->setAsWaitEvent(event); - mutableWaitEvent.var = variable; - mutableWaitEvent.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS; - mutableWaitEvent.waitEvents.event = event; - mutableWaitEvent.waitEvents.waitEventIndex = i; + mutableWaitEventDesc.event = event; + mutableWaitEventDesc.eventVariable = variable; + mutableWaitEventDesc.waitEventIndex = i; + if (CommandListImp::isInOrderExecutionEnabled() && event->isCounterBased()) { - mutableWaitEvent.waitEvents.waitEventPackets = event->getInOrderExecInfo()->getNumDevicePartitionsToWait(); + mutableWaitEventDesc.waitEventPackets = event->getInOrderExecInfo()->getNumDevicePartitionsToWait(); if (!isCbEventBoundToCmdList(event)) { omitWaitEventResidency = true; auto deviceCounterAlloc = event->getInOrderExecInfo()->getDeviceCounterAllocation(); addToResidencyContainer(getDeviceCounterAllocForResidency(deviceCounterAlloc)); } } else { - mutableWaitEvent.waitEvents.waitEventPackets = event->getPacketsToWait(); + mutableWaitEventDesc.waitEventPackets = event->getPacketsToWait(); } - currentAppend.variables.push_back(mutableWaitEvent); + currentAppend.variables.waitEvents.push_back(mutableWaitEventDesc); NEO::GraphicsAllocation *eventPoolAlloc = event->getAllocation(this->device); if (eventPoolAlloc) { @@ -228,23 +229,23 @@ inline ze_result_t MutableCommandListCoreFamily::appendLaunchKern } if (this->nextAppendKernelMutable) { - if (mutableEventParams.currentSignalEventDescriptorIndex != std::numeric_limits::max()) { - MutableVariableDescriptor ¤tMutableSignalEvent = this->mutations[(nextCommandId - 1)].variables[mutableEventParams.currentSignalEventDescriptorIndex]; + if (this->mutations[(nextCommandId - 1)].variables.signalEvent.eventVariable != nullptr) { + auto &signalEventVariableDesc = this->mutations[(nextCommandId - 1)].variables.signalEvent; MutableComputeWalker *walker = nullptr; MutablePipeControl *signalPipeControl = nullptr; if (mutableEventParams.counterBasedEvent) { // both TS and L3 flush events need additional clean Store Data Imm -> signal cmd (CW or PC or StoreRegMem) -> sync SemWait if (mutableEventParams.counterBasedTimestampEvent || mutableEventParams.l3FlushEvent) { - auto &eventVariableSemaphoreWaitList = currentMutableSignalEvent.var->getSemWaitList(); - auto &eventVariableStoreDataImmList = currentMutableSignalEvent.var->getStoreDataImmList(); + auto &eventVariableSemaphoreWaitList = signalEventVariableDesc.eventVariable->getSemWaitList(); + auto &eventVariableStoreDataImmList = signalEventVariableDesc.eventVariable->getStoreDataImmList(); - captureCounterBasedTimestampSignalEventCommands(currentMutableSignalEvent, + captureCounterBasedTimestampSignalEventCommands(signalEventVariableDesc, eventVariableSemaphoreWaitList, eventVariableStoreDataImmList); walker = this->appendKernelMutableComputeWalker; } if (mutableEventParams.l3FlushEventTimestampSyncCmds) { // L3 TS is signaled by StoreRegMem - auto &eventVariableStoreRegMemList = currentMutableSignalEvent.var->getStoreRegMemList(); + auto &eventVariableStoreRegMemList = signalEventVariableDesc.eventVariable->getStoreRegMemList(); captureStandaloneTimestampSignalEventCommands(eventVariableStoreRegMemList); } else if (mutableEventParams.l3FlushEventSyncCmd) { // L3 Immediate is signaled by PC @@ -258,14 +259,14 @@ inline ze_result_t MutableCommandListCoreFamily::appendLaunchKern } } else { if (mutableEventParams.eventInsideInOrder) { - auto &eventVariableSemaphoreWaitList = currentMutableSignalEvent.var->getSemWaitList(); - auto &eventVariableStoreDataImmList = currentMutableSignalEvent.var->getStoreDataImmList(); + auto &eventVariableSemaphoreWaitList = signalEventVariableDesc.eventVariable->getSemWaitList(); + auto &eventVariableStoreDataImmList = signalEventVariableDesc.eventVariable->getStoreDataImmList(); - captureCounterBasedTimestampSignalEventCommands(currentMutableSignalEvent, + captureCounterBasedTimestampSignalEventCommands(signalEventVariableDesc, eventVariableSemaphoreWaitList, eventVariableStoreDataImmList); } if (mutableEventParams.l3FlushEventTimestampSyncCmds) { - auto &eventVariableStoreRegMemList = currentMutableSignalEvent.var->getStoreRegMemList(); + auto &eventVariableStoreRegMemList = signalEventVariableDesc.eventVariable->getStoreRegMemList(); captureStandaloneTimestampSignalEventCommands(eventVariableStoreRegMemList); } else if (mutableEventParams.l3FlushEventSyncCmd) { auto signalPipeControlPtr = std::make_unique>(mutableEventParams.signalCmd.pDestination); @@ -275,33 +276,30 @@ inline ze_result_t MutableCommandListCoreFamily::appendLaunchKern walker = this->appendKernelMutableComputeWalker; } } - currentMutableSignalEvent.var->setAsSignalEvent(currentMutableSignalEvent.signalEvent.event, - walker, - signalPipeControl); - NEO::GraphicsAllocation *eventAlloc = currentMutableSignalEvent.signalEvent.event->getAllocation(this->device); + signalEventVariableDesc.eventVariable->setAsSignalEvent(signalEventVariableDesc.event, + walker, + signalPipeControl); + NEO::GraphicsAllocation *eventAlloc = signalEventVariableDesc.event->getAllocation(this->device); if (eventAlloc) { addToResidencyContainer(eventAlloc); } } if (mutableEventParams.waitEvents) { - auto waitEventCmdToPatchIterator = this->appendCmdsToPatch.begin(); if (waitEventCmdToPatchIterator->type == CommandToPatch::CommandType::PrefetchKernelMemory) { waitEventCmdToPatchIterator++; } AppendMutation ¤tAppend = this->mutations[(nextCommandId - 1)]; - for (uint32_t i = 0; i < numWaitEvents; i++) { - MutableVariableDescriptor &mutableWaitEvent = currentAppend.variables[i]; - UNRECOVERABLE_IF(ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS != mutableWaitEvent.varType); - UNRECOVERABLE_IF(i != mutableWaitEvent.waitEvents.waitEventIndex); + WaitEventVariableDescriptor &mutableWaitEvent = currentAppend.variables.waitEvents[i]; + UNRECOVERABLE_IF(i != mutableWaitEvent.waitEventIndex); - auto &variableSemWaitCmdList = mutableWaitEvent.var->getSemWaitList(); - auto &variableLoadRegImmCmdList = mutableWaitEvent.var->getLoadRegImmList(); + auto &variableSemWaitCmdList = mutableWaitEvent.eventVariable->getSemWaitList(); + auto &variableLoadRegImmCmdList = mutableWaitEvent.eventVariable->getLoadRegImmList(); - for (uint32_t packet = 0; packet < mutableWaitEvent.waitEvents.waitEventPackets; packet++) { - if (CommandListImp::isInOrderExecutionEnabled() && mutableWaitEvent.waitEvents.event->isCounterBased() && (this->heaplessModeEnabled || !mutableWaitEvent.waitEvents.event->hasInOrderTimestampNode())) { + for (uint32_t packet = 0; packet < mutableWaitEvent.waitEventPackets; packet++) { + if (CommandListImp::isInOrderExecutionEnabled() && mutableWaitEvent.event->isCounterBased() && (this->heaplessModeEnabled || !mutableWaitEvent.event->hasInOrderTimestampNode())) { captureCounterBasedWaitEventCommands(waitEventCmdToPatchIterator, variableSemWaitCmdList, variableLoadRegImmCmdList); } else { captureRegularWaitEventCommands(waitEventCmdToPatchIterator, variableSemWaitCmdList); @@ -338,7 +336,7 @@ inline ze_result_t MutableCommandListCoreFamily::appendLaunchKern } MutableAppendLaunchKernelWithParams mutableCmdlistAppendLaunchParams = {}; - MutationVariables *currentVariables = nullptr; + KernelVariableDescriptor *currentKernelVariables = nullptr; if (this->nextAppendKernelMutable) { AppendMutation ¤tAppend = this->mutations[(nextCommandId - 1)]; @@ -358,12 +356,12 @@ inline ze_result_t MutableCommandListCoreFamily::appendLaunchKern mutableCmdlistAppendLaunchParams.localRegionSizeFromApi = launchParams.localRegionSize; mutableCmdlistAppendLaunchParams.isCooperativeFromApi = launchParams.isCooperative; - currentVariables = &mutableCmdlistAppendLaunchParams.currentMutableKernel->getKernelVariables(); + currentKernelVariables = &mutableCmdlistAppendLaunchParams.currentMutableKernel->getKernelVariables(); } else { - currentVariables = ¤tAppend.variables; + currentKernelVariables = ¤tAppend.variables.kernelVariables; } - storeKernelArgumentAndDispatchVariables(mutableCmdlistAppendLaunchParams, launchParams, kernel, currentVariables, currentAppend.mutationFlags); + storeKernelArgumentAndDispatchVariables(mutableCmdlistAppendLaunchParams, launchParams, kernel, currentKernelVariables, currentAppend.mutationFlags); if (mutableCmdlistAppendLaunchParams.kernelMutation) { launchParams.reserveExtraPayloadSpace += mutableCmdlistAppendLaunchParams.extraPayloadSpaceForKernelGroup; @@ -678,11 +676,11 @@ void MutableCommandListCoreFamily::captureRegularWaitEventCommand } template -void MutableCommandListCoreFamily::captureCounterBasedTimestampSignalEventCommands(MutableVariableDescriptor ¤tMutableSignalEvent, +void MutableCommandListCoreFamily::captureCounterBasedTimestampSignalEventCommands(SignalEventVariableDescriptor ¤tMutableSignalEvent, std::vector &variableSemaphoreWaitList, std::vector &variableStoreDataImmList) { auto partitionCount = CommandListCoreFamily::getPartitionCount(); - uint32_t syncWaitEventPackets = currentMutableSignalEvent.signalEvent.event->getPacketsToWait(); + uint32_t syncWaitEventPackets = currentMutableSignalEvent.event->getPacketsToWait(); uint32_t clearEventOps = syncWaitEventPackets / partitionCount; variableSemaphoreWaitList.reserve(syncWaitEventPackets); @@ -730,27 +728,21 @@ template void MutableCommandListCoreFamily::storeKernelArgumentAndDispatchVariables(MutableAppendLaunchKernelWithParams &mutableParams, CmdListKernelLaunchParams &launchParams, Kernel *kernel, - MutationVariables *variableDescriptors, + KernelVariableDescriptor *kernelVariables, ze_mutable_command_exp_flags_t mutableFlags) { if ((mutableFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT) == ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT) { InterfaceVariableDescriptor varDesc = {}; varDesc.isStageCommit = true; getVariable(&varDesc, &mutableParams.groupCountVariable); - MutableVariableDescriptor mutableGroupCount = {}; - mutableGroupCount.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT; - mutableGroupCount.var = mutableParams.groupCountVariable; - variableDescriptors->push_back(mutableGroupCount); + kernelVariables->groupCount = mutableParams.groupCountVariable; } if ((mutableFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE) == ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE) { InterfaceVariableDescriptor varDesc = {}; varDesc.isStageCommit = true; getVariable(&varDesc, &mutableParams.groupSizeVariable); - MutableVariableDescriptor mutableGroupSize = {}; - mutableGroupSize.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE; - mutableGroupSize.var = mutableParams.groupSizeVariable; - variableDescriptors->push_back(mutableGroupSize); + kernelVariables->groupSize = mutableParams.groupSizeVariable; this->enableReservePerThreadForLocalId = true; launchParams.reserveExtraPayloadSpace = this->maxPerThreadDataSize; @@ -759,16 +751,16 @@ void MutableCommandListCoreFamily::storeKernelArgumentAndDispatch InterfaceVariableDescriptor varDesc = {}; getVariable(&varDesc, &mutableParams.globalOffsetVariable); - MutableVariableDescriptor mutableGlobalOffset = {}; - mutableGlobalOffset.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; - mutableGlobalOffset.var = mutableParams.globalOffsetVariable; - variableDescriptors->push_back(mutableGlobalOffset); + kernelVariables->globalOffset = mutableParams.globalOffsetVariable; } if ((mutableFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS) == ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS) { // intercept kernel arguments auto &kernelArgs = kernel->getKernelDescriptor().payloadMappings.explicitArgs; + kernelVariables->kernelArguments.reserve(kernelArgs.size()); uint32_t argCount = 0; for (const auto &arg : kernelArgs) { + KernelArgumentVariableDescriptor mutableKernelArgumentDesc = {}; + mutableKernelArgumentDesc.argIndex = argCount; bool captureArgument = false; bool slmArgument = false; bool immediateArgument = arg.type == NEO::ArgDescriptor::argTValue; @@ -789,12 +781,9 @@ void MutableCommandListCoreFamily::storeKernelArgumentAndDispatch getVariable(&varDesc, &variable); variable->setAsKernelArg(kernel->toHandle(), argCount); - MutableVariableDescriptor mutableKernelArgument = {}; - mutableKernelArgument.var = variable; - mutableKernelArgument.kernelArguments.argIndex = argCount; - mutableKernelArgument.kernelArguments.argType = arg.type; - mutableKernelArgument.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS; - variableDescriptors->push_back(mutableKernelArgument); + mutableKernelArgumentDesc.kernelArgumentVariable = variable; + + kernelVariables->kernelArguments.push_back(mutableKernelArgumentDesc); mutableParams.kernelArgumentMutation = true; @@ -804,7 +793,11 @@ void MutableCommandListCoreFamily::storeKernelArgumentAndDispatch } mutableParams.lastSlmArgumentVariable = variable; } + } else { + // in case kernel argument is non-mutable (not captured), push it into kernel arguments vector anyway to preserve index order + kernelVariables->kernelArguments.push_back(mutableKernelArgumentDesc); } + argCount++; } } @@ -818,14 +811,10 @@ void MutableCommandListCoreFamily::storeSignalEventVariable(Mutab ze_mutable_command_exp_flags_t mutableFlags) { if ((mutableFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT) == ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT) { if (event != nullptr) { - MutableVariableDescriptor mutableSignalEvent = {}; - mutableSignalEvent.signalEvent.event = event; - mutableSignalEvent.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT; Variable *variable = nullptr; InterfaceVariableDescriptor varDesc = {}; getVariable(&varDesc, &variable); - mutableSignalEvent.var = variable; launchParams.omitAddingEventResidency = event->getAllocation(this->device) != nullptr; @@ -874,8 +863,8 @@ void MutableCommandListCoreFamily::storeSignalEventVariable(Mutab } } - variableDescriptors->emplace_back(mutableSignalEvent); - mutableEventParams.currentSignalEventDescriptorIndex = variableDescriptors->size() - 1; + variableDescriptors->signalEvent.event = event; + variableDescriptors->signalEvent.eventVariable = variable; } } } diff --git a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_imp.cpp b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_imp.cpp index 3dcbfd5038..f854392b98 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_imp.cpp +++ b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_imp.cpp @@ -396,47 +396,43 @@ ze_result_t MutableCommandListImp::updateMutableCommandsExp(const ze_mutable_com ze_result_t result = ZE_RESULT_SUCCESS; const void *next = desc->pNext; while (next != nullptr) { - MutationVariables *currentVariables = nullptr; + KernelVariableDescriptor *currentVariables = nullptr; const ze_base_desc_t *extendedDesc = reinterpret_cast(next); if (extendedDesc->stype == ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC) { - const ze_mutable_kernel_argument_exp_desc_t *kernelArgumentDesc = reinterpret_cast(next); - AppendMutation &selectedAppend = this->mutations[(kernelArgumentDesc->commandId - 1)]; + const ze_mutable_kernel_argument_exp_desc_t *apiKernelArgumentDesc = reinterpret_cast(next); + AppendMutation &selectedAppend = this->mutations[(apiKernelArgumentDesc->commandId - 1)]; if ((selectedAppend.mutationFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS) == 0) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } currentVariables = getVariableDescriptorContainer(selectedAppend); - MutableVariableDescriptor *mutableKernelArgumentDesc = nullptr; - for (auto &mutableTypeDescriptor : *currentVariables) { - if (mutableTypeDescriptor.varType != ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS || - mutableTypeDescriptor.kernelArguments.argIndex != kernelArgumentDesc->argIndex) { - continue; - } - mutableKernelArgumentDesc = &mutableTypeDescriptor; - break; - } - if (mutableKernelArgumentDesc == nullptr) { + if (apiKernelArgumentDesc->argIndex + 1 > currentVariables->kernelArguments.size()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - if (mutableKernelArgumentDesc->var->getType() == VariableType::buffer) { - auto argValue = kernelArgumentDesc->pArgValue == nullptr ? nullptr : *reinterpret_cast(kernelArgumentDesc->pArgValue); - if (mutableKernelArgumentDesc->var->getDesc().argValue == argValue) { + KernelArgumentVariableDescriptor &kernelArgDesc = currentVariables->kernelArguments[apiKernelArgumentDesc->argIndex]; + UNRECOVERABLE_IF(kernelArgDesc.argIndex != apiKernelArgumentDesc->argIndex); + if (kernelArgDesc.kernelArgumentVariable == nullptr) { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + if (kernelArgDesc.kernelArgumentVariable->getType() == VariableType::buffer) { + auto argValue = apiKernelArgumentDesc->pArgValue == nullptr ? nullptr : *reinterpret_cast(apiKernelArgumentDesc->pArgValue); + if (kernelArgDesc.kernelArgumentVariable->getDesc().argValue == argValue) { PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintMclData.get(), stderr, "MCL update kernel arg commandId: %" PRIu64 " argument idx: %u, buffer - same value: %p\n", - kernelArgumentDesc->commandId, kernelArgumentDesc->argIndex, argValue); + apiKernelArgumentDesc->commandId, apiKernelArgumentDesc->argIndex, argValue); next = extendedDesc->pNext; continue; } } - result = mutableKernelArgumentDesc->var->setValue(kernelArgumentDesc->argSize, 0, kernelArgumentDesc->pArgValue); + result = kernelArgDesc.kernelArgumentVariable->setValue(apiKernelArgumentDesc->argSize, 0, apiKernelArgumentDesc->pArgValue); if (result != ZE_RESULT_SUCCESS) { return result; } this->updatedCommandList = true; - if (mutableKernelArgumentDesc->var->getType() == VariableType::slmBuffer && mutableKernelArgumentDesc->var->isCooperativeVariable()) { - auto varDispatch = mutableKernelArgumentDesc->var->getInitialVariableDispatch(); + if (kernelArgDesc.kernelArgumentVariable->getType() == VariableType::slmBuffer && kernelArgDesc.kernelArgumentVariable->isCooperativeVariable()) { + auto varDispatch = kernelArgDesc.kernelArgumentVariable->getInitialVariableDispatch(); cooperativeKernelVariableDispatches.insert(varDispatch); } PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintMclData.get(), stderr, "MCL update kernel arg commandId: %" PRIu64 " argument idx: %u, size: %zu, val: %p\n", - kernelArgumentDesc->commandId, kernelArgumentDesc->argIndex, kernelArgumentDesc->argSize, mutableKernelArgumentDesc->var->getDesc().argValue); + apiKernelArgumentDesc->commandId, apiKernelArgumentDesc->argIndex, apiKernelArgumentDesc->argSize, kernelArgDesc.kernelArgumentVariable->getDesc().argValue); } if (extendedDesc->stype == ZE_STRUCTURE_TYPE_MUTABLE_GROUP_COUNT_EXP_DESC) { const ze_mutable_group_count_exp_desc_t *groupCountDesc = reinterpret_cast(next); @@ -445,24 +441,16 @@ ze_result_t MutableCommandListImp::updateMutableCommandsExp(const ze_mutable_com return ZE_RESULT_ERROR_INVALID_ARGUMENT; } currentVariables = getVariableDescriptorContainer(selectedAppend); - MutableVariableDescriptor *mutableGroupCountDesc = nullptr; - for (auto &mutableTypeDescriptor : *currentVariables) { - if (mutableTypeDescriptor.varType != ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT) { - continue; - } - mutableGroupCountDesc = &mutableTypeDescriptor; - break; - } - if (mutableGroupCountDesc == nullptr) { + if (currentVariables->groupCount == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - result = mutableGroupCountDesc->var->setValue(sizeof(ze_group_count_t), 0, groupCountDesc->pGroupCount); + result = currentVariables->groupCount->setValue(sizeof(ze_group_count_t), 0, groupCountDesc->pGroupCount); if (result != ZE_RESULT_SUCCESS) { return result; } this->updatedCommandList = true; - if (mutableGroupCountDesc->var->isCooperativeVariable()) { - auto varDispatch = mutableGroupCountDesc->var->getInitialVariableDispatch(); + if (currentVariables->groupCount->isCooperativeVariable()) { + auto varDispatch = currentVariables->groupCount->getInitialVariableDispatch(); cooperativeKernelVariableDispatches.insert(varDispatch); } PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintMclData.get(), stderr, "MCL update group count commandId: %" PRIu64 " x: %u y: %u z: %u\n", @@ -475,25 +463,17 @@ ze_result_t MutableCommandListImp::updateMutableCommandsExp(const ze_mutable_com return ZE_RESULT_ERROR_INVALID_ARGUMENT; } currentVariables = getVariableDescriptorContainer(selectedAppend); - MutableVariableDescriptor *mutableGroupSizeDesc = nullptr; - for (auto &mutableTypeDescriptor : *currentVariables) { - if (mutableTypeDescriptor.varType != ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE) { - continue; - } - mutableGroupSizeDesc = &mutableTypeDescriptor; - break; - } - if (mutableGroupSizeDesc == nullptr) { + if (currentVariables->groupSize == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } uint32_t groupSize[3] = {groupSizeDesc->groupSizeX, groupSizeDesc->groupSizeY, groupSizeDesc->groupSizeZ}; - result = mutableGroupSizeDesc->var->setValue(sizeof(groupSize), 0, groupSize); + result = currentVariables->groupSize->setValue(sizeof(groupSize), 0, groupSize); if (result != ZE_RESULT_SUCCESS) { return result; } this->updatedCommandList = true; - if (mutableGroupSizeDesc->var->isCooperativeVariable()) { - auto varDispatch = mutableGroupSizeDesc->var->getInitialVariableDispatch(); + if (currentVariables->groupSize->isCooperativeVariable()) { + auto varDispatch = currentVariables->groupSize->getInitialVariableDispatch(); cooperativeKernelVariableDispatches.insert(varDispatch); } PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintMclData.get(), stderr, "MCL update group size commandId: %" PRIu64 " x: %u y: %u z: %u\n", @@ -506,19 +486,11 @@ ze_result_t MutableCommandListImp::updateMutableCommandsExp(const ze_mutable_com return ZE_RESULT_ERROR_INVALID_ARGUMENT; } currentVariables = getVariableDescriptorContainer(selectedAppend); - MutableVariableDescriptor *mutableGlobalOffsetDesc = nullptr; - for (auto &mutableTypeDescriptor : *currentVariables) { - if (mutableTypeDescriptor.varType != ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET) { - continue; - } - mutableGlobalOffsetDesc = &mutableTypeDescriptor; - break; - } - if (mutableGlobalOffsetDesc == nullptr) { + if (currentVariables->globalOffset == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } uint32_t globalOffset[3] = {globalOffsetDesc->offsetX, globalOffsetDesc->offsetY, globalOffsetDesc->offsetZ}; - result = mutableGlobalOffsetDesc->var->setValue(sizeof(globalOffset), 0, globalOffset); + result = currentVariables->globalOffset->setValue(sizeof(globalOffset), 0, globalOffset); if (result != ZE_RESULT_SUCCESS) { return result; } @@ -546,25 +518,17 @@ ze_result_t MutableCommandListImp::updateMutableCommandSignalEventExp(uint64_t c if ((selectedAppend.mutationFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT) == 0) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - MutableVariableDescriptor *mutableSignalEventDesc = nullptr; - for (auto &mutableTypeDescriptor : selectedAppend.variables) { - if (mutableTypeDescriptor.varType != ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT) { - continue; - } - mutableSignalEventDesc = &mutableTypeDescriptor; - break; - } - if (mutableSignalEventDesc == nullptr) { + if (selectedAppend.variables.signalEvent.eventVariable == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto inputEvent = Event::fromHandle(signalEvent); - if (mutableSignalEventDesc->signalEvent.event == inputEvent) { + if (selectedAppend.variables.signalEvent.event == inputEvent) { return ZE_RESULT_SUCCESS; } - auto ret = mutableSignalEventDesc->var->setValue(0, 0, inputEvent); + auto ret = selectedAppend.variables.signalEvent.eventVariable->setValue(0, 0, inputEvent); if (ret == ZE_RESULT_SUCCESS) { - mutableSignalEventDesc->signalEvent.event = inputEvent; + selectedAppend.variables.signalEvent.event = inputEvent; this->updatedCommandList = true; } return ret; @@ -576,29 +540,21 @@ ze_result_t MutableCommandListImp::updateMutableCommandWaitEventsExp(uint64_t co if ((selectedAppend.mutationFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS) == 0) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - - MutableVariableDescriptor *mutableWaitEventDesc = nullptr; + if (numWaitEvents > selectedAppend.variables.waitEvents.size()) { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } for (uint32_t eventNum = 0; eventNum < numWaitEvents; eventNum++) { - for (auto &mutableTypeDescriptor : selectedAppend.variables) { - if (mutableTypeDescriptor.varType != ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS || mutableTypeDescriptor.waitEvents.waitEventIndex != eventNum) { - continue; - } - mutableWaitEventDesc = &mutableTypeDescriptor; - break; - } - if (mutableWaitEventDesc == nullptr) { - return ZE_RESULT_ERROR_INVALID_ARGUMENT; - } - + WaitEventVariableDescriptor &mutableWaitEventDesc = selectedAppend.variables.waitEvents[eventNum]; + UNRECOVERABLE_IF(mutableWaitEventDesc.waitEventIndex != eventNum); auto waitEventHandle = toInternalType(phWaitEvents[eventNum]); auto inputEvent = Event::fromHandle(waitEventHandle); - if (mutableWaitEventDesc->waitEvents.event == inputEvent) { + if (mutableWaitEventDesc.event == inputEvent) { continue; } - auto retCode = mutableWaitEventDesc->var->setValue(0, 0, inputEvent); + auto retCode = mutableWaitEventDesc.eventVariable->setValue(0, 0, inputEvent); if (retCode == ZE_RESULT_SUCCESS) { - mutableWaitEventDesc->waitEvents.event = inputEvent; + mutableWaitEventDesc.event = inputEvent; this->updatedCommandList = true; } else { return retCode; @@ -643,27 +599,29 @@ ze_result_t MutableCommandListImp::updateMutableCommandKernelsExp(uint32_t numKe } // remove old kernel arguments (buffers) from mutable residency and reset variables { - for (auto &kernelVariableDescriptor : oldMutableKernel->getKernelVariables()) { - auto &varDescriptor = kernelVariableDescriptor.var->getDesc(); + auto &kernelVariableDescriptors = oldMutableKernel->getKernelVariables(); + for (auto &kernelArgVarDesc : kernelVariableDescriptors.kernelArguments) { + if (kernelArgVarDesc.kernelArgumentVariable == nullptr) { + continue; + } + auto &varDescriptor = kernelArgVarDesc.kernelArgumentVariable->getDesc(); if (varDescriptor.type == VariableType::buffer) { if (varDescriptor.bufferAlloc != nullptr) { removeFromResidencyContainer(varDescriptor.bufferAlloc); } - kernelVariableDescriptor.var->resetBufferVariable(); + kernelArgVarDesc.kernelArgumentVariable->resetBufferVariable(); } if (varDescriptor.type == VariableType::slmBuffer) { - kernelVariableDescriptor.var->resetSlmVariable(); - } - if (varDescriptor.type == VariableType::groupCount) { - kernelVariableDescriptor.var->resetGroupCountVariable(); - } - if (varDescriptor.type == VariableType::groupSize) { - kernelVariableDescriptor.var->resetGroupSizeVariable(); - } - if (varDescriptor.type == VariableType::globalOffset) { - kernelVariableDescriptor.var->resetGlobalOffsetVariable(); + kernelArgVarDesc.kernelArgumentVariable->resetSlmVariable(); } } + kernelVariableDescriptors.groupCount->resetGroupCountVariable(); + if (kernelVariableDescriptors.groupSize != nullptr) { + kernelVariableDescriptors.groupSize->resetGroupSizeVariable(); + } + if (kernelVariableDescriptors.globalOffset != nullptr) { + kernelVariableDescriptors.globalOffset->resetGlobalOffsetVariable(); + } } // copy const data from host view into heap payload @@ -703,9 +661,9 @@ ze_result_t MutableCommandListImp::updateMutableCommandKernelsExp(uint32_t numKe // save new host view inline data/post sync into command buffer newKernelComputeWalker->saveCpuBufferIntoGpuBuffer(false, true); - // update reminder variables (signal/wait events variables) with new compute walker to have correct reference for new post sync addresses - for (auto &mutableVariableDescriptor : selectedAppend.variables) { - mutableVariableDescriptor.var->updateMutableComputeWalker(newKernelComputeWalker); + // update reminder variables (signal event variable) with new compute walker to have correct reference for new post sync addresses + if (selectedAppend.variables.signalEvent.eventVariable != nullptr) { + selectedAppend.variables.signalEvent.eventVariable->updateMutableComputeWalker(newKernelComputeWalker); } // add new kernel to mutable residency diff --git a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_imp.h b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_imp.h index 39dcd17b73..b08c21a01d 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_imp.h +++ b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_imp.h @@ -75,11 +75,6 @@ struct MclAllocations { }; struct AppendMutation { - AppendMutation() { - constexpr size_t estimatedKernelArgumentPerAppendCount = 40 + 2; // kernel args + 2 for group size and group count per kernel in kernel group - // reference to variables is used in append but NOT for kernel group (ISA mutation) and their descriptors - variables.reserve(estimatedKernelArgumentPerAppendCount); - } MutationVariables variables; MutableKernelGroup *kernelGroup = nullptr; ze_mutable_command_exp_flags_t mutationFlags = 0; @@ -168,11 +163,11 @@ struct MutableCommandListImp : public MutableCommandList { void createNativeBinary(ArrayRef module); KernelData *getKernelData(L0::Kernel *kernel); - MutationVariables *getVariableDescriptorContainer(AppendMutation &selectedAppend) { + KernelVariableDescriptor *getVariableDescriptorContainer(AppendMutation &selectedAppend) { if (selectedAppend.kernelGroup != nullptr) { return &selectedAppend.kernelGroup->getCurrentMutableKernel()->getKernelVariables(); } else { - return &selectedAppend.variables; + return &selectedAppend.variables.kernelVariables; } } diff --git a/level_zero/core/source/mutable_cmdlist/mutable_kernel.cpp b/level_zero/core/source/mutable_cmdlist/mutable_kernel.cpp index 873edcf243..595bde0522 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_kernel.cpp +++ b/level_zero/core/source/mutable_cmdlist/mutable_kernel.cpp @@ -23,18 +23,11 @@ namespace L0::MCL { MutableKernel::MutableKernel(ze_kernel_handle_t kernelHandle, uint32_t inlineDataSize, uint32_t maxPerThreadDataSize) : inlineDataSize(inlineDataSize), maxPerThreadDataSize(maxPerThreadDataSize) { - // group count/size - constexpr size_t estimatedDispatchVariablesCount = 2; - // kernel arguments - constexpr size_t estimatedKernelArgumentPerAppendCount = 40; - // kernel args and extra group count/size - constexpr size_t estimatedVariablesPerAppend = estimatedKernelArgumentPerAppendCount + estimatedDispatchVariablesCount; + this->kernel = L0::Kernel::fromHandle(kernelHandle); + this->kernelVariables.kernelArguments.reserve(this->kernel->getKernelDescriptor().payloadMappings.explicitArgs.size()); // space for internal allocations like ISA, private, const, global buffers, etc. constexpr size_t estimatedInternalResidencyCount = 10; - // reference to variables is used in append but for a given kernel for kernel group (ISA mutation) and their descriptors - this->kernelVariables.reserve(estimatedVariablesPerAppend); this->kernelResidencySnapshotContainer.reserve(estimatedInternalResidencyCount); - this->kernel = L0::Kernel::fromHandle(kernelHandle); } uint32_t MutableKernel::getKernelScratchSize(uint32_t slotId) const { diff --git a/level_zero/core/source/mutable_cmdlist/mutable_kernel.h b/level_zero/core/source/mutable_cmdlist/mutable_kernel.h index 8bc95f5a8c..d65e437224 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_kernel.h +++ b/level_zero/core/source/mutable_cmdlist/mutable_kernel.h @@ -29,7 +29,7 @@ class MutableKernel { MutableKernel(ze_kernel_handle_t kernelHandle, uint32_t inlineDataSize, uint32_t maxPerThreadDataSize); - MutationVariables &getKernelVariables() { + KernelVariableDescriptor &getKernelVariables() { return kernelVariables; } @@ -82,7 +82,7 @@ class MutableKernel { bool checkKernelCompatible(); protected: - MutationVariables kernelVariables; + KernelVariableDescriptor kernelVariables; NEO::ResidencyContainer kernelResidencySnapshotContainer; std::unique_ptr hostViewIndirectData; diff --git a/level_zero/core/source/mutable_cmdlist/mutable_variable_descriptor.h b/level_zero/core/source/mutable_cmdlist/mutable_variable_descriptor.h index f9158480a8..4ab4d8cddd 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_variable_descriptor.h +++ b/level_zero/core/source/mutable_cmdlist/mutable_variable_descriptor.h @@ -19,30 +19,33 @@ namespace L0::MCL { struct Variable; struct KernelArgumentVariableDescriptor { - uint32_t argType = 0; + Variable *kernelArgumentVariable = nullptr; uint32_t argIndex = 0; }; struct SignalEventVariableDescriptor { + Variable *eventVariable = nullptr; Event *event = nullptr; }; struct WaitEventVariableDescriptor { + Variable *eventVariable = nullptr; Event *event = nullptr; uint32_t waitEventIndex = 0; uint32_t waitEventPackets = 0; }; -struct MutableVariableDescriptor { - Variable *var = nullptr; - union { - KernelArgumentVariableDescriptor kernelArguments; - SignalEventVariableDescriptor signalEvent; - WaitEventVariableDescriptor waitEvents; - }; - ze_mutable_command_exp_flag_t varType; +struct KernelVariableDescriptor { + std::vector kernelArguments; + Variable *groupCount = nullptr; + Variable *groupSize = nullptr; + Variable *globalOffset = nullptr; }; -using MutationVariables = std::vector; +struct MutationVariables { + KernelVariableDescriptor kernelVariables; + SignalEventVariableDescriptor signalEvent; + std::vector waitEvents; +}; } // namespace L0::MCL diff --git a/level_zero/core/test/unit_tests/sources/mutable_cmdlist/fixtures/mutable_cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/sources/mutable_cmdlist/fixtures/mutable_cmdlist_fixture.cpp index a2cb840d4e..e60e1a3f41 100644 --- a/level_zero/core/test/unit_tests/sources/mutable_cmdlist/fixtures/mutable_cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/sources/mutable_cmdlist/fixtures/mutable_cmdlist_fixture.cpp @@ -328,20 +328,51 @@ void MutableCommandListFixtureInit::prepareKernelArg(uint16_t argIndex, L0::MCL: std::vector MutableCommandListFixtureInit::getVariableList(uint64_t commandId, L0::MCL::VariableType varType, L0::Kernel *kernelOption) { auto &selectedAppend = mutableCommandList->mutations[(commandId - 1)]; std::vector selectedVariables; - L0::MCL::MutationVariables *appendVariableDescriptors = nullptr; - if (kernelOption != nullptr) { - for (auto &mutableKernel : selectedAppend.kernelGroup->getKernelsInGroup()) { - if (mutableKernel->getKernel() == kernelOption) { - appendVariableDescriptors = &mutableKernel->getKernelVariables(); + L0::MCL::KernelVariableDescriptor *kernelVariableDescriptors = nullptr; + if (varType == L0::MCL::VariableType::buffer || + varType == L0::MCL::VariableType::value || + varType == L0::MCL::VariableType::slmBuffer || + varType == L0::MCL::VariableType::globalOffset || + varType == L0::MCL::VariableType::groupCount || + varType == L0::MCL::VariableType::groupSize) { + if (kernelOption != nullptr) { + for (auto &mutableKernel : selectedAppend.kernelGroup->getKernelsInGroup()) { + if (mutableKernel->getKernel() == kernelOption) { + kernelVariableDescriptors = &mutableKernel->getKernelVariables(); + } + } + } else { + kernelVariableDescriptors = &selectedAppend.variables.kernelVariables; + } + if (kernelVariableDescriptors != nullptr) { + if (varType == L0::MCL::VariableType::buffer || + varType == L0::MCL::VariableType::value || + varType == L0::MCL::VariableType::slmBuffer) { + for (auto &varDesc : kernelVariableDescriptors->kernelArguments) { + if (varDesc.kernelArgumentVariable != nullptr && + varType == varDesc.kernelArgumentVariable->getDesc().type) { + selectedVariables.push_back(varDesc.kernelArgumentVariable); + } + } + } + if (varType == L0::MCL::VariableType::globalOffset && kernelVariableDescriptors->globalOffset != nullptr) { + selectedVariables.push_back(kernelVariableDescriptors->globalOffset); + } + if (varType == L0::MCL::VariableType::groupCount && kernelVariableDescriptors->groupCount != nullptr) { + selectedVariables.push_back(kernelVariableDescriptors->groupCount); + } + if (varType == L0::MCL::VariableType::groupSize && kernelVariableDescriptors->groupSize != nullptr) { + selectedVariables.push_back(kernelVariableDescriptors->groupSize); } } - } else { - appendVariableDescriptors = &selectedAppend.variables; } - if (appendVariableDescriptors != nullptr) { - for (auto &varDesc : *appendVariableDescriptors) { - if (varDesc.var->getType() == varType) { - selectedVariables.push_back(varDesc.var); + if (varType == L0::MCL::VariableType::signalEvent && selectedAppend.variables.signalEvent.eventVariable != nullptr) { + selectedVariables.push_back(selectedAppend.variables.signalEvent.eventVariable); + } + if (varType == L0::MCL::VariableType::waitEvent) { + for (auto &varDesc : selectedAppend.variables.waitEvents) { + if (varDesc.eventVariable != nullptr) { + selectedVariables.push_back(varDesc.eventVariable); } } } diff --git a/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_kernels_tests.cpp b/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_kernels_tests.cpp index 75499eaaea..d0e51575fa 100644 --- a/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_kernels_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_kernels_tests.cpp @@ -361,6 +361,41 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EXPECT_EQ(usm2Allocation, kernel2BufferVariable->getDesc().bufferAlloc); } +HWCMDTEST_F(IGFX_XE_HP_CORE, + MutableCommandListKernelTest, + givenTwoKernelsWithBufferAndImageArgumentsWhenMutatingKernelsThenImageArgumentIsNotReset) { + + // set kernel arg 0, 1 => buffer, image + resizeKernelArg(2); + prepareKernelArg(0, L0::MCL::VariableType::buffer, kernelAllMask); + + NEO::ArgDescriptor kernelArgImage = {NEO::ArgDescriptor::argTImage}; + mockKernelImmData->kernelDescriptor->payloadMappings.explicitArgs[1] = kernelArgImage; + mockKernelImmData2->kernelDescriptor->payloadMappings.explicitArgs[1] = kernelArgImage; + + mutableCommandIdDesc.flags = kernelIsaMutationFlags; + + auto result = mutableCommandList->getNextCommandId(&mutableCommandIdDesc, 2, kernelMutationGroup, &commandId); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + result = mutableCommandList->appendLaunchKernel(kernelHandle, this->testGroupCount, nullptr, 0, nullptr, this->testLaunchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + result = mutableCommandList->close(); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto &mutation = mutableCommandList->mutations[commandId - 1]; + ASSERT_NE(nullptr, mutation.kernelGroup); + for (auto &mutableKernel : mutation.kernelGroup->getKernelsInGroup()) { + ASSERT_EQ(2u, mutableKernel->getKernelVariables().kernelArguments.size()); + // image at index 1 + EXPECT_EQ(nullptr, mutableKernel->getKernelVariables().kernelArguments[1].kernelArgumentVariable); + } + + result = mutableCommandList->updateMutableCommandKernelsExp(1, &commandId, &kernel2Handle); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + HWCMDTEST_F(IGFX_XE_HP_CORE, MutableCommandListKernelTest, givenTwoKernelsOneWithBiggerPayloadSizeWhenFirstAppendedWithSmallerAndSecondMutatedThenBiggerPayloadSizeConsumedAsReserveAtAppendTime) { diff --git a/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_tests.cpp b/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_tests.cpp index d35d7ab773..0f0c8fb19c 100644 --- a/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/mutable_cmdlist/tests/mutable_cmdlist_tests.cpp @@ -140,32 +140,28 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, mutableCommandList->toggleCommandListUpdated(); EXPECT_TRUE(mutableCommandList->updatedCommandList); - auto &bufferVarMutDesc = mutation.variables[0]; - EXPECT_EQ(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS, bufferVarMutDesc.varType); - auto &bufferInternalDesc = bufferVarMutDesc.var->getDesc(); + auto &bufferVarMutDesc = mutation.variables.kernelVariables.kernelArguments[0]; + auto &bufferInternalDesc = bufferVarMutDesc.kernelArgumentVariable->getDesc(); EXPECT_EQ(L0::MCL::VariableType::buffer, bufferInternalDesc.type); - auto gpuVaPatchFullAddress = reinterpret_cast(bufferVarMutDesc.var->getBufferUsages().statelessWithoutOffset[0]); + auto gpuVaPatchFullAddress = reinterpret_cast(bufferVarMutDesc.kernelArgumentVariable->getBufferUsages().statelessWithoutOffset[0]); memcpy(&usmPatchAddressValue, gpuVaPatchFullAddress, sizeof(uint64_t)); EXPECT_EQ(reinterpret_cast(usm1), usmPatchAddressValue); - auto &valueVarMutDesc = mutation.variables[1]; - EXPECT_EQ(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS, valueVarMutDesc.varType); - auto &valueInternalDesc = valueVarMutDesc.var->getDesc(); + auto &valueVarMutDesc = mutation.variables.kernelVariables.kernelArguments[1]; + auto &valueInternalDesc = valueVarMutDesc.kernelArgumentVariable->getDesc(); EXPECT_EQ(L0::MCL::VariableType::value, valueInternalDesc.type); - auto immediatePatchFullAddress = reinterpret_cast(valueVarMutDesc.var->getValueUsages().statelessWithoutOffset[0]); + auto immediatePatchFullAddress = reinterpret_cast(valueVarMutDesc.kernelArgumentVariable->getValueUsages().statelessWithoutOffset[0]); memcpy(&valueVariablePatchValue, immediatePatchFullAddress, sizeof(uint32_t)); EXPECT_EQ(value1, valueVariablePatchValue); - auto &slmVarMutDesc = mutation.variables[2]; - EXPECT_EQ(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS, slmVarMutDesc.varType); - auto &slmInternalDesc = slmVarMutDesc.var->getDesc(); + auto &slmVarMutDesc = mutation.variables.kernelVariables.kernelArguments[2]; + auto &slmInternalDesc = slmVarMutDesc.kernelArgumentVariable->getDesc(); EXPECT_EQ(L0::MCL::VariableType::slmBuffer, slmInternalDesc.type); - auto &slm2VarMutDesc = mutation.variables[3]; - EXPECT_EQ(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS, slm2VarMutDesc.varType); - auto &slm2InternalDesc = slm2VarMutDesc.var->getDesc(); + auto &slm2VarMutDesc = mutation.variables.kernelVariables.kernelArguments[3]; + auto &slm2InternalDesc = slm2VarMutDesc.kernelArgumentVariable->getDesc(); EXPECT_EQ(L0::MCL::VariableType::slmBuffer, slm2InternalDesc.type); - auto slmPatchFullAddress = reinterpret_cast(slm2VarMutDesc.var->getBufferUsages().statelessWithoutOffset[0]); + auto slmPatchFullAddress = reinterpret_cast(slm2VarMutDesc.kernelArgumentVariable->getBufferUsages().statelessWithoutOffset[0]); memcpy(&slmVariablePatchValue, slmPatchFullAddress, sizeof(uint32_t)); EXPECT_EQ(static_cast(slm1arg1), slmVariablePatchValue); @@ -262,10 +258,26 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EXPECT_EQ(ZE_RESULT_SUCCESS, result); // only buffer created - ASSERT_EQ(1u, mutation.variables.size()); + ASSERT_EQ(2u, mutation.variables.kernelVariables.kernelArguments.size()); // at index 1 - EXPECT_EQ(1u, mutation.variables[0].kernelArguments.argIndex); + EXPECT_EQ(nullptr, mutation.variables.kernelVariables.kernelArguments[0].kernelArgumentVariable); + EXPECT_NE(nullptr, mutation.variables.kernelVariables.kernelArguments[1].kernelArgumentVariable); + + void *buffer = reinterpret_cast(0x12345678); + + ze_mutable_kernel_argument_exp_desc_t kernelBufferArg = {ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC}; + mutableCommandsDesc.pNext = &kernelBufferArg; + + kernelBufferArg.argIndex = 0; + kernelBufferArg.argSize = sizeof(void *); + kernelBufferArg.commandId = commandId; + kernelBufferArg.pArgValue = &buffer; + + // cannot mutate when variable is not created + result = mutableCommandList->updateMutableCommandsExp(&mutableCommandsDesc); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } + HWCMDTEST_F(IGFX_XE_HP_CORE, MutableCommandListTest, givenMutableCommandListWhenAppendingKernelWithOnlySlmArgumentThenSlmVariableCreated) { @@ -284,9 +296,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->appendLaunchKernel(kernel->toHandle(), this->testGroupCount, nullptr, 0, nullptr, this->testLaunchParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - ASSERT_EQ(1u, mutation.variables.size()); - EXPECT_EQ(0u, mutation.variables[0].kernelArguments.argIndex); - EXPECT_EQ(L0::MCL::VariableType::slmBuffer, mutation.variables[0].var->getType()); + ASSERT_EQ(1u, mutation.variables.kernelVariables.kernelArguments.size()); + EXPECT_EQ(0u, mutation.variables.kernelVariables.kernelArguments[0].argIndex); + EXPECT_EQ(L0::MCL::VariableType::slmBuffer, mutation.variables.kernelVariables.kernelArguments[0].kernelArgumentVariable->getType()); } HWCMDTEST_F(IGFX_XE_HP_CORE, @@ -336,19 +348,17 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto &buffer1VarMutDesc = mutation.variables[0]; - EXPECT_EQ(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS, buffer1VarMutDesc.varType); - auto &buffer1InternalDesc = buffer1VarMutDesc.var->getDesc(); + auto &buffer1VarMutDesc = mutation.variables.kernelVariables.kernelArguments[0]; + auto &buffer1InternalDesc = buffer1VarMutDesc.kernelArgumentVariable->getDesc(); EXPECT_EQ(L0::MCL::VariableType::buffer, buffer1InternalDesc.type); - auto gpuVa1PatchFullAddress = reinterpret_cast(buffer1VarMutDesc.var->getBufferUsages().statelessWithoutOffset[0]); + auto gpuVa1PatchFullAddress = reinterpret_cast(buffer1VarMutDesc.kernelArgumentVariable->getBufferUsages().statelessWithoutOffset[0]); memcpy(&usmPatchAddressValue, gpuVa1PatchFullAddress, sizeof(uint64_t)); EXPECT_EQ(reinterpret_cast(usm1), usmPatchAddressValue); - auto &buffer2VarMutDesc = mutation.variables[1]; - EXPECT_EQ(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS, buffer2VarMutDesc.varType); - auto &buffer2InternalDesc = buffer2VarMutDesc.var->getDesc(); + auto &buffer2VarMutDesc = mutation.variables.kernelVariables.kernelArguments[1]; + auto &buffer2InternalDesc = buffer2VarMutDesc.kernelArgumentVariable->getDesc(); EXPECT_EQ(L0::MCL::VariableType::buffer, buffer2InternalDesc.type); - auto gpuVa2PatchFullAddress = reinterpret_cast(buffer2VarMutDesc.var->getBufferUsages().statelessWithoutOffset[0]); + auto gpuVa2PatchFullAddress = reinterpret_cast(buffer2VarMutDesc.kernelArgumentVariable->getBufferUsages().statelessWithoutOffset[0]); memcpy(&usmPatchAddressValue, gpuVa2PatchFullAddress, sizeof(uint64_t)); EXPECT_EQ(reinterpret_cast(nullSurface), usmPatchAddressValue); @@ -438,21 +448,19 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto &bufferVarMutDesc = mutation.variables[0]; - EXPECT_EQ(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS, bufferVarMutDesc.varType); - auto &bufferInternalDesc = bufferVarMutDesc.var->getDesc(); + auto &bufferVarMutDesc = mutation.variables.kernelVariables.kernelArguments[0]; + auto &bufferInternalDesc = bufferVarMutDesc.kernelArgumentVariable->getDesc(); EXPECT_EQ(L0::MCL::VariableType::buffer, bufferInternalDesc.type); - ASSERT_NE(0u, bufferVarMutDesc.var->getBufferUsages().commandBufferWithoutOffset.size()); - auto gpuVaPatchFullAddress = reinterpret_cast(bufferVarMutDesc.var->getBufferUsages().commandBufferWithoutOffset[0]); + ASSERT_NE(0u, bufferVarMutDesc.kernelArgumentVariable->getBufferUsages().commandBufferWithoutOffset.size()); + auto gpuVaPatchFullAddress = reinterpret_cast(bufferVarMutDesc.kernelArgumentVariable->getBufferUsages().commandBufferWithoutOffset[0]); memcpy(&usmPatchAddressValue, gpuVaPatchFullAddress, sizeof(uint64_t)); EXPECT_EQ(reinterpret_cast(usm1), usmPatchAddressValue); - auto &immediateVarMutDesc = mutation.variables[1]; - EXPECT_EQ(ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS, immediateVarMutDesc.varType); - auto &immediateInternalDesc = immediateVarMutDesc.var->getDesc(); + auto &immediateVarMutDesc = mutation.variables.kernelVariables.kernelArguments[1]; + auto &immediateInternalDesc = immediateVarMutDesc.kernelArgumentVariable->getDesc(); EXPECT_EQ(L0::MCL::VariableType::value, immediateInternalDesc.type); - ASSERT_NE(0u, immediateVarMutDesc.var->getValueUsages().commandBufferWithoutOffset.size()); - auto immediatePatchFullAddress = reinterpret_cast(immediateVarMutDesc.var->getValueUsages().commandBufferWithoutOffset[0]); + ASSERT_NE(0u, immediateVarMutDesc.kernelArgumentVariable->getValueUsages().commandBufferWithoutOffset.size()); + auto immediatePatchFullAddress = reinterpret_cast(immediateVarMutDesc.kernelArgumentVariable->getValueUsages().commandBufferWithoutOffset[0]); memcpy(&valueVariablePatchValue, immediatePatchFullAddress, sizeof(uint32_t)); EXPECT_EQ(value1, valueVariablePatchValue); @@ -517,7 +525,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(0u, mutation.variables.size()); + ASSERT_EQ(1u, mutation.variables.kernelVariables.kernelArguments.size()); + EXPECT_EQ(nullptr, mutation.variables.kernelVariables.kernelArguments[0].kernelArgumentVariable); } HWCMDTEST_F(IGFX_XE_HP_CORE, @@ -669,7 +678,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, resizeKernelArg(1); prepareKernelArg(0, L0::MCL::VariableType::buffer, kernelAllMask); - mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS; + mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS; auto result = mutableCommandList->getNextCommandId(&mutableCommandIdDesc, 0, nullptr, &commandId); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -682,12 +691,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - - for (auto &varDesc : mutation.variables) { - if (varDesc.varType == static_cast(ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT)) { - varDesc.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE; - } - } + mutation.mutationFlags |= ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT; ze_mutable_group_count_exp_desc_t groupCountDesc = {ZE_STRUCTURE_TYPE_MUTABLE_GROUP_COUNT_EXP_DESC}; @@ -799,7 +803,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, resizeKernelArg(1); prepareKernelArg(0, L0::MCL::VariableType::buffer, kernelAllMask); - mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE | ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS; + mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS; auto result = mutableCommandList->getNextCommandId(&mutableCommandIdDesc, 0, nullptr, &commandId); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -813,11 +817,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - for (auto &varDesc : mutation.variables) { - if (varDesc.varType == static_cast(ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE)) { - varDesc.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT; - } - } + mutation.mutationFlags |= ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE; ze_mutable_group_size_exp_desc_t groupSizeDesc = {ZE_STRUCTURE_TYPE_MUTABLE_GROUP_SIZE_EXP_DESC}; @@ -919,7 +919,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, resizeKernelArg(1); prepareKernelArg(0, L0::MCL::VariableType::buffer, kernelAllMask); - mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET | ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS; + mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS; auto result = mutableCommandList->getNextCommandId(&mutableCommandIdDesc, 0, nullptr, &commandId); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -933,11 +933,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - for (auto &varDesc : mutation.variables) { - if (varDesc.varType == static_cast(ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET)) { - varDesc.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT; - } - } + mutation.mutationFlags |= ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; ze_mutable_global_offset_exp_desc_t globalOffsetDesc = {ZE_STRUCTURE_TYPE_MUTABLE_GLOBAL_OFFSET_EXP_DESC}; @@ -1201,7 +1197,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, auto originalEvent = createTestEvent(false, false, false, false); auto mutatedEvent = createTestEvent(false, false, false, false); - mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT; + mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT; auto result = mutableCommandList->getNextCommandId(&mutableCommandIdDesc, 0, nullptr, &commandId); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -1215,11 +1211,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - for (auto &varDesc : mutation.variables) { - if (varDesc.varType == static_cast(ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT)) { - varDesc.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT; - } - } + mutation.mutationFlags |= ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT; result = mutableCommandList->updateMutableCommandSignalEventExp(commandId, mutatedEvent->toHandle()); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); @@ -1687,7 +1679,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, auto mutatedEvent = createTestEvent(false, false, false, false); auto mutatedHandle = mutatedEvent->toHandle(); - mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS; + mutableCommandIdDesc.flags = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT; auto result = mutableCommandList->getNextCommandId(&mutableCommandIdDesc, 0, nullptr, &commandId); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -1701,11 +1693,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, result = mutableCommandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - for (auto &varDesc : mutation.variables) { - if (varDesc.varType == static_cast(ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS)) { - varDesc.varType = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT; - } - } + mutation.mutationFlags |= ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS; result = mutableCommandList->updateMutableCommandWaitEventsExp(commandId, 1, &mutatedHandle); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);