From a869921c2a470ab64fdd0b4c1e8eb251bf21bd28 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Fri, 9 Jul 2021 12:14:05 +0000 Subject: [PATCH] Remove RMW from gfx allocations Signed-off-by: Zbigniew Zdanowicz --- .../core/source/cmdlist/cmdlist_hw_base.inl | 6 ++- .../source/cmdlist/cmdlist_hw_xehp_plus.inl | 6 ++- .../cmdqueue/cmdqueue_xe_hp_core_plus.inl | 2 +- level_zero/core/source/kernel/kernel_hw.h | 5 +- .../hardware_interface_xehp_plus.inl | 2 +- .../aub_command_stream_receiver_hw_base.inl | 9 ++-- .../device_queue/device_queue_hw_base.inl | 14 +++--- .../device_queue/device_queue_hw_bdw_plus.inl | 46 ++++++++++--------- .../device_queue_hw_profiling.inl | 18 ++++---- opencl/source/gen8/device_queue_gen8.cpp | 13 +++--- opencl/source/gen8/gpgpu_walker_gen8.cpp | 7 +-- opencl/source/gen9/gpgpu_walker_gen9.cpp | 8 ++-- .../enqueue_kernel_two_ooq_tests.cpp | 2 +- .../experimental_command_buffer.inl | 13 +++--- .../helpers/flat_batch_buffer_helper_hw.inl | 17 +++---- shared/source/helpers/hw_helper_base.inl | 4 +- .../encoders/test_encode_dispatch_kernel.cpp | 24 ++++------ 17 files changed, 104 insertions(+), 92 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index e6214c2576..6ebedb378a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -126,13 +126,15 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z if (neoDevice->getDebugger()) { auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); - auto surfaceState = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); + auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); auto debugSurface = device->getDebugSurface(); auto mocs = device->getMOCS(false, false); - NEO::EncodeSurfaceState::encodeBuffer(surfaceState, debugSurface->getGpuAddress(), + auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; + NEO::EncodeSurfaceState::encodeBuffer(&surfaceState, debugSurface->getGpuAddress(), debugSurface->getUnderlyingBufferSize(), mocs, false, false, false, neoDevice->getNumAvailableDevices(), debugSurface, neoDevice->getGmmHelper(), kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u); + *reinterpret_cast(surfaceStateSpace) = surfaceState; } appendSignalEventPostWalker(hEvent); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl index 38ddfa0234..f6fd4f56c5 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl @@ -191,14 +191,16 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z if (neoDevice->getDebugger()) { auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); - auto surfaceState = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); + auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); auto debugSurface = device->getDebugSurface(); auto mocs = device->getMOCS(false, false); - NEO::EncodeSurfaceState::encodeBuffer(surfaceState, debugSurface->getGpuAddress(), + auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; + NEO::EncodeSurfaceState::encodeBuffer(&surfaceState, debugSurface->getGpuAddress(), debugSurface->getUnderlyingBufferSize(), mocs, false, false, false, neoDevice->getNumAvailableDevices(), debugSurface, neoDevice->getGmmHelper(), kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, 1u); + *reinterpret_cast(surfaceStateSpace) = surfaceState; } // Attach Function residency to our CommandList residency { diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_plus.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_plus.inl index 852793004e..826f90d4b6 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_plus.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_plus.inl @@ -54,7 +54,7 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool auto &hwInfo = neoDevice->getHardwareInfo(); auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { - auto pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); + pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); *pSbaCmd = sbaCmd; } diff --git a/level_zero/core/source/kernel/kernel_hw.h b/level_zero/core/source/kernel/kernel_hw.h index 1e26a8d2a4..ee2ffa280e 100644 --- a/level_zero/core/source/kernel/kernel_hw.h +++ b/level_zero/core/source/kernel/kernel_hw.h @@ -48,10 +48,12 @@ struct KernelHw : public KernelImp { offset = 0; } void *surfaceStateAddress = nullptr; + auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; if (NEO::isValidOffset(argInfo.bindless)) { surfaceStateAddress = patchBindlessSurfaceState(alloc, argInfo.bindless); } else { surfaceStateAddress = ptrOffset(surfaceStateHeapData.get(), argInfo.bindful); + surfaceState = *reinterpret_cast(surfaceStateAddress); } uint64_t bufferAddressForSsh = baseAddress; auto alignment = NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment(); @@ -66,10 +68,11 @@ struct KernelHw : public KernelImp { auto mocs = this->module->getDevice()->getMOCS(l3Enabled, false); NEO::Device *neoDevice = module->getDevice()->getNEODevice(); - NEO::EncodeSurfaceState::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs, + NEO::EncodeSurfaceState::encodeBuffer(&surfaceState, bufferAddressForSsh, bufferSizeForSsh, mocs, false, false, false, neoDevice->getNumAvailableDevices(), alloc, neoDevice->getGmmHelper(), kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u); + *reinterpret_cast(surfaceStateAddress) = surfaceState; } std::unique_ptr clone() const override { diff --git a/opencl/source/command_queue/hardware_interface_xehp_plus.inl b/opencl/source/command_queue/hardware_interface_xehp_plus.inl index 87a253b047..dd8052e370 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_plus.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_plus.inl @@ -141,7 +141,7 @@ inline void HardwareInterface::programWalker( auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); timestampPacket->setPacketsUsed(partitionCount); } else { - auto computeWalkerOnStream = reinterpret_cast(commandStream.getSpace(sizeof(typename GfxFamily::COMPUTE_WALKER))); + auto computeWalkerOnStream = commandStream.getSpaceForCmd(); *computeWalkerOnStream = walkerCmd; } } diff --git a/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl b/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl index 058b339c88..b415643dfd 100644 --- a/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl +++ b/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl @@ -826,11 +826,12 @@ void AUBCommandStreamReceiverHw::addGUCStartMessage(uint64_t batchBuf uint32_t *header = static_cast(linearStream.getSpace(sizeof(uint32_t))); *header = getGUCWorkQueueItemHeader(); - MI_BATCH_BUFFER_START *miBatchBufferStart = linearStream.getSpaceForCmd(); + MI_BATCH_BUFFER_START *miBatchBufferStartSpace = linearStream.getSpaceForCmd(); DEBUG_BREAK_IF(bufferSize != linearStream.getUsed()); - *miBatchBufferStart = GfxFamily::cmdInitBatchBufferStart; - miBatchBufferStart->setBatchBufferStartAddressGraphicsaddress472(AUB::ptrToPPGTT(buffer.get())); - miBatchBufferStart->setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); + auto miBatchBufferStart = GfxFamily::cmdInitBatchBufferStart; + miBatchBufferStart.setBatchBufferStartAddressGraphicsaddress472(AUB::ptrToPPGTT(buffer.get())); + miBatchBufferStart.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); + *miBatchBufferStartSpace = miBatchBufferStart; auto physBufferAddres = ppgtt->map(reinterpret_cast(buffer.get()), bufferSize, this->getPPGTTAdditionalBits(linearStream.getGraphicsAllocation()), diff --git a/opencl/source/device_queue/device_queue_hw_base.inl b/opencl/source/device_queue/device_queue_hw_base.inl index 8c0d9ac567..af51cfd995 100644 --- a/opencl/source/device_queue/device_queue_hw_base.inl +++ b/opencl/source/device_queue/device_queue_hw_base.inl @@ -94,12 +94,14 @@ void DeviceQueueHw::resetDeviceQueue() { template void DeviceQueueHw::initPipeControl(PIPE_CONTROL *pc) { - *pc = GfxFamily::cmdInitPipeControl; - pc->setStateCacheInvalidationEnable(0x1); - pc->setDcFlushEnable(true); - pc->setPipeControlFlushEnable(true); - pc->setTextureCacheInvalidationEnable(true); - pc->setCommandStreamerStallEnable(true); + auto cmd = GfxFamily::cmdInitPipeControl; + cmd.setStateCacheInvalidationEnable(0x1); + cmd.setDcFlushEnable(true); + cmd.setPipeControlFlushEnable(true); + cmd.setTextureCacheInvalidationEnable(true); + cmd.setCommandStreamerStallEnable(true); + + *pc = cmd; } template diff --git a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl index 56c53a1278..39ad88c5ae 100644 --- a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl +++ b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl @@ -59,13 +59,13 @@ void DeviceQueueHw::buildSlbDummyCommands() { addMiAtomicCmdWa((uint64_t)&igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder); - auto mediaIdLoad = slbCS.getSpaceForCmd(); - *mediaIdLoad = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; - mediaIdLoad->setInterfaceDescriptorTotalLength(2048); + auto mediaIdLoadSpace = slbCS.getSpaceForCmd(); + auto mediaIdLoad = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; + mediaIdLoad.setInterfaceDescriptorTotalLength(2048); auto dataStartAddress = colorCalcStateSize; - - mediaIdLoad->setInterfaceDescriptorDataStartAddress(dataStartAddress + sizeof(INTERFACE_DESCRIPTOR_DATA) * schedulerIDIndex); + mediaIdLoad.setInterfaceDescriptorDataStartAddress(dataStartAddress + sizeof(INTERFACE_DESCRIPTOR_DATA) * schedulerIDIndex); + *mediaIdLoadSpace = mediaIdLoad; addLriCmdWa(true); @@ -80,14 +80,15 @@ void DeviceQueueHw::buildSlbDummyCommands() { addPipeControlCmdWa(true); } - auto gpgpuWalker = slbCS.getSpaceForCmd(); - *gpgpuWalker = GfxFamily::cmdInitGpgpuWalker; - gpgpuWalker->setSimdSize(GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16); - gpgpuWalker->setThreadGroupIdXDimension(1); - gpgpuWalker->setThreadGroupIdYDimension(1); - gpgpuWalker->setThreadGroupIdZDimension(1); - gpgpuWalker->setRightExecutionMask(0xFFFFFFFF); - gpgpuWalker->setBottomExecutionMask(0xFFFFFFFF); + auto gpgpuWalkerSpace = slbCS.getSpaceForCmd(); + auto gpgpuWalker = GfxFamily::cmdInitGpgpuWalker; + gpgpuWalker.setSimdSize(GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16); + gpgpuWalker.setThreadGroupIdXDimension(1); + gpgpuWalker.setThreadGroupIdYDimension(1); + gpgpuWalker.setThreadGroupIdZDimension(1); + gpgpuWalker.setRightExecutionMask(0xFFFFFFFF); + gpgpuWalker.setBottomExecutionMask(0xFFFFFFFF); + *gpgpuWalkerSpace = gpgpuWalker; mediaStateFlush = slbCS.getSpaceForCmd(); *mediaStateFlush = GfxFamily::cmdInitMediaStateFlush; @@ -109,10 +110,11 @@ void DeviceQueueHw::buildSlbDummyCommands() { auto bbStartOffset = (commandsSize * 128) - slbCS.getUsed(); slbCS.getSpace(bbStartOffset); - auto bbStart = slbCS.getSpaceForCmd(); - *bbStart = GfxFamily::cmdInitBatchBufferStart; + auto bbStartSpace = slbCS.getSpaceForCmd(); + auto bbStart = GfxFamily::cmdInitBatchBufferStart; auto slbPtr = reinterpret_cast(slbBuffer->getUnderlyingBuffer()); - bbStart->setBatchBufferStartAddressGraphicsaddress472(slbPtr); + bbStart.setBatchBufferStartAddressGraphicsaddress472(slbPtr); + *bbStartSpace = bbStart; igilCmdQueue->m_controls.m_CleanupSectionSize = 0; igilQueue->m_controls.m_CleanupSectionAddress = 0; @@ -124,12 +126,12 @@ void DeviceQueueHw::addMediaStateClearCmds() { addPipeControlCmdWa(); - auto pipeControl = slbCS.getSpaceForCmd(); - *pipeControl = GfxFamily::cmdInitPipeControl; - pipeControl->setGenericMediaStateClear(true); - pipeControl->setCommandStreamerStallEnable(true); - - addDcFlushToPipeControlWa(pipeControl); + auto pipeControlSpace = slbCS.getSpaceForCmd(); + auto pipeControl = GfxFamily::cmdInitPipeControl; + pipeControl.setGenericMediaStateClear(true); + pipeControl.setCommandStreamerStallEnable(true); + addDcFlushToPipeControlWa(&pipeControl); + *pipeControlSpace = pipeControl; auto pVfeState = PreambleHelper::getSpaceForVfeState(&slbCS, device->getHardwareInfo(), EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; diff --git a/opencl/source/device_queue/device_queue_hw_profiling.inl b/opencl/source/device_queue/device_queue_hw_profiling.inl index 72d151c146..e6d4f8fb2b 100644 --- a/opencl/source/device_queue/device_queue_hw_profiling.inl +++ b/opencl/source/device_queue/device_queue_hw_profiling.inl @@ -13,15 +13,17 @@ namespace NEO { template void DeviceQueueHw::addProfilingEndCmds(uint64_t timestampAddress) { - auto pPipeControlCmd = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL)); - *pPipeControlCmd = GfxFamily::cmdInitPipeControl; - pPipeControlCmd->setCommandStreamerStallEnable(true); + auto pipeControlSpace = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL)); + auto pipeControlCmd = GfxFamily::cmdInitPipeControl; + pipeControlCmd.setCommandStreamerStallEnable(true); + *pipeControlSpace = pipeControlCmd; //low part - auto pMICmdLow = (MI_STORE_REGISTER_MEM *)slbCS.getSpace(sizeof(MI_STORE_REGISTER_MEM)); - *pMICmdLow = GfxFamily::cmdInitStoreRegisterMem; - GpgpuWalkerHelper::adjustMiStoreRegMemMode(pMICmdLow); - pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); - pMICmdLow->setMemoryAddress(timestampAddress); + auto mICmdLowSpace = (MI_STORE_REGISTER_MEM *)slbCS.getSpace(sizeof(MI_STORE_REGISTER_MEM)); + auto mICmdLow = GfxFamily::cmdInitStoreRegisterMem; + GpgpuWalkerHelper::adjustMiStoreRegMemMode(&mICmdLow); + mICmdLow.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); + mICmdLow.setMemoryAddress(timestampAddress); + *mICmdLowSpace = mICmdLow; } } // namespace NEO diff --git a/opencl/source/gen8/device_queue_gen8.cpp b/opencl/source/gen8/device_queue_gen8.cpp index b224d84eec..23ad58257f 100644 --- a/opencl/source/gen8/device_queue_gen8.cpp +++ b/opencl/source/gen8/device_queue_gen8.cpp @@ -62,12 +62,13 @@ void DeviceQueueHw::addPipeControlCmdWa(bool isNoopCmd) {} template <> void DeviceQueueHw::addProfilingEndCmds(uint64_t timestampAddress) { - auto pPipeControlCmd = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL)); - *pPipeControlCmd = Family::cmdInitPipeControl; - pPipeControlCmd->setCommandStreamerStallEnable(true); - pPipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); - pPipeControlCmd->setAddressHigh(timestampAddress >> 32); - pPipeControlCmd->setAddress(timestampAddress & (0xffffffff)); + auto pipeControlSpace = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL)); + auto pipeControlCmd = Family::cmdInitPipeControl; + pipeControlCmd.setCommandStreamerStallEnable(true); + pipeControlCmd.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); + pipeControlCmd.setAddressHigh(timestampAddress >> 32); + pipeControlCmd.setAddress(timestampAddress & (0xffffffff)); + *pipeControlSpace = pipeControlCmd; } template <> diff --git a/opencl/source/gen8/gpgpu_walker_gen8.cpp b/opencl/source/gen8/gpgpu_walker_gen8.cpp index 7e1c7966aa..4c218c7249 100644 --- a/opencl/source/gen8/gpgpu_walker_gen8.cpp +++ b/opencl/source/gen8/gpgpu_walker_gen8.cpp @@ -23,9 +23,10 @@ void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStr if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL; - auto pCmd = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); - *pCmd = BDWFamily::cmdInitPipeControl; - pCmd->setCommandStreamerStallEnable(true); + auto pipeControlSpace = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); + auto pipeControl = BDWFamily::cmdInitPipeControl; + pipeControl.setCommandStreamerStallEnable(true); + *pipeControlSpace = pipeControl; // Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS); } diff --git a/opencl/source/gen9/gpgpu_walker_gen9.cpp b/opencl/source/gen9/gpgpu_walker_gen9.cpp index 6447bcdfc4..a19b966b3d 100644 --- a/opencl/source/gen9/gpgpu_walker_gen9.cpp +++ b/opencl/source/gen9/gpgpu_walker_gen9.cpp @@ -23,9 +23,11 @@ void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStr if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL; - auto pCmd = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); - *pCmd = SKLFamily::cmdInitPipeControl; - pCmd->setCommandStreamerStallEnable(true); + auto pipeControlSpace = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); + auto pipeControl = SKLFamily::cmdInitPipeControl; + pipeControl.setCommandStreamerStallEnable(true); + *pipeControlSpace = pipeControl; + // Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS); } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_two_ooq_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_two_ooq_tests.cpp index be11ab077d..b228e04afa 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_two_ooq_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_two_ooq_tests.cpp @@ -141,7 +141,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TwoOOQsTwoDependentWalkers, GivenTwoCommandQueuesWhe auto numCommands = commandsList.size(); EXPECT_EQ(1u, numCommands); - auto expectedCmd = MEDIA_VFE_STATE::sInit(); + auto expectedCmd = FamilyType::cmdInitMediaVfeState; if (numCommands > 1) { uint32_t commandIndex = 0; diff --git a/shared/source/command_stream/experimental_command_buffer.inl b/shared/source/command_stream/experimental_command_buffer.inl index a291a36a02..0ee3aeb3c7 100644 --- a/shared/source/command_stream/experimental_command_buffer.inl +++ b/shared/source/command_stream/experimental_command_buffer.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -98,11 +98,12 @@ void ExperimentalCommandBuffer::addExperimentalCommands() { *semaphoreData = 1; uint64_t gpuAddr = experimentalAllocation->getGpuAddress() + experimentalAllocationOffset; - auto semaphoreCmd = currentStream->getSpaceForCmd(); - *semaphoreCmd = GfxFamily::cmdInitMiSemaphoreWait; - semaphoreCmd->setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD); - semaphoreCmd->setSemaphoreDataDword(*semaphoreData); - semaphoreCmd->setSemaphoreGraphicsAddress(gpuAddr); + auto semaphoreCmdSpace = currentStream->getSpaceForCmd(); + auto semaphoreCmd = GfxFamily::cmdInitMiSemaphoreWait; + semaphoreCmd.setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD); + semaphoreCmd.setSemaphoreDataDword(*semaphoreData); + semaphoreCmd.setSemaphoreGraphicsAddress(gpuAddr); + *semaphoreCmdSpace = semaphoreCmd; } template diff --git a/shared/source/helpers/flat_batch_buffer_helper_hw.inl b/shared/source/helpers/flat_batch_buffer_helper_hw.inl index 8b828f006f..b4260b351b 100644 --- a/shared/source/helpers/flat_batch_buffer_helper_hw.inl +++ b/shared/source/helpers/flat_batch_buffer_helper_hw.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -163,13 +163,14 @@ char *FlatBatchBufferHelperHw::getIndirectPatchCommands(size_t &indir for (auto &patchInfoData : patchInfoCopy) { if (patchInfoData.requiresIndirectPatching()) { bool is32BitAddress = patchInfoData.patchAddressSize == sizeof(uint32_t); - auto storeDataImmediate = indirectPatchCommandStream.getSpaceForCmd(); - *storeDataImmediate = GfxFamily::cmdInitStoreDataImm; - storeDataImmediate->setAddress(patchInfoData.targetAllocation + patchInfoData.targetAllocationOffset); - storeDataImmediate->setStoreQword(!is32BitAddress); - storeDataImmediate->setDwordLength(is32BitAddress ? MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD : MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_QWORD); - storeDataImmediate->setDataDword0(static_cast((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) & 0x0000FFFFFFFFULL)); - storeDataImmediate->setDataDword1(static_cast((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) >> 32)); + auto storeDataImmediateSpace = indirectPatchCommandStream.getSpaceForCmd(); + auto storeDataImmediate = GfxFamily::cmdInitStoreDataImm; + storeDataImmediate.setAddress(patchInfoData.targetAllocation + patchInfoData.targetAllocationOffset); + storeDataImmediate.setStoreQword(!is32BitAddress); + storeDataImmediate.setDwordLength(is32BitAddress ? MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD : MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_QWORD); + storeDataImmediate.setDataDword0(static_cast((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) & 0x0000FFFFFFFFULL)); + storeDataImmediate.setDataDword1(static_cast((patchInfoData.sourceAllocation + patchInfoData.sourceAllocationOffset) >> 32)); + *storeDataImmediateSpace = storeDataImmediate; PatchInfoData patchInfoForAddress(patchInfoData.targetAllocation, patchInfoData.targetAllocationOffset, patchInfoData.targetType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - 2 * sizeof(uint64_t), PatchInfoAllocationType::Default); PatchInfoData patchInfoForValue(patchInfoData.sourceAllocation, patchInfoData.sourceAllocationOffset, patchInfoData.sourceType, 0u, stiCommandOffset + sizeof(MI_STORE_DATA_IMM) - sizeof(uint64_t), PatchInfoAllocationType::Default); diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index 166f6f5a74..61157b377c 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -160,9 +160,9 @@ void HwHelperHw::setRenderSurfaceStateForBuffer(const RootDeviceEnvironm EncodeSurfaceState::setCoherencyType(&state, RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT); state.setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } - *surfaceState = state; + setL1CachePolicy(useL1Cache, &state, rootDeviceEnvironment.getHardwareInfo()); - setL1CachePolicy(useL1Cache, surfaceState, rootDeviceEnvironment.getHardwareInfo()); + *surfaceState = state; } template diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index fd1c735bb3..bd4c2bb3eb 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -319,8 +319,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenOneBindingTableEntryWh using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t numBindingTable = 1; - BINDING_TABLE_STATE bindingTableState; - bindingTableState.sInit(); + BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; auto ssh = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); size_t sizeUsed = 0x20; @@ -353,8 +352,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumBindingTableZeroWhen using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t numBindingTable = 0; - BINDING_TABLE_STATE bindingTableState; - bindingTableState.sInit(); + BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; auto ssh = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); size_t sizeUsed = 0x20; @@ -785,8 +783,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenS using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using WALKER = typename FamilyType::WALKER_TYPE; uint32_t numBindingTable = 1; - BINDING_TABLE_STATE bindingTableState; - bindingTableState.sInit(); + BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; uint32_t dims[] = {1, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -816,8 +813,7 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindlessKernelWhenDispatchingKernelThen using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using WALKER = typename FamilyType::WALKER_TYPE; uint32_t numBindingTable = 1; - BINDING_TABLE_STATE bindingTableState; - bindingTableState.sInit(); + BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; uint32_t dims[] = {1, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -846,8 +842,7 @@ HWTEST_F(EncodeDispatchKernelTest, givenNonBindlessOrStatelessArgWhenDispatching using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor; uint32_t numBindingTable = 1; - BINDING_TABLE_STATE bindingTableState; - bindingTableState.sInit(); + BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; auto ssh = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); auto ioh = cmdContainer->getIndirectHeap(HeapType::INDIRECT_OBJECT); @@ -1135,8 +1130,7 @@ HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessKernelWhenBindlessModeE commandContainer->setDirtyStateForAllHeaps(false); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(), pDevice->getNumAvailableDevices() > 1, pDevice->getRootDeviceIndex()); uint32_t numBindingTable = 1; - BINDING_TABLE_STATE bindingTableState; - bindingTableState.sInit(); + BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; uint32_t dims[] = {1, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -1170,8 +1164,7 @@ HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindfulKernelWhenBindlessModeEn commandContainer->setDirtyStateForAllHeaps(false); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(), pDevice->getNumAvailableDevices() > 1, pDevice->getRootDeviceIndex()); uint32_t numBindingTable = 1; - BINDING_TABLE_STATE bindingTableState; - bindingTableState.sInit(); + BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; uint32_t dims[] = {1, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); @@ -1205,8 +1198,7 @@ HWTEST_F(BindlessCommandEncodeStatesTesttt, givenBindlessModeEnabledWhenDispatch commandContainer->setDirtyStateForAllHeaps(false); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(), pDevice->getNumAvailableDevices() > 1, pDevice->getRootDeviceIndex()); uint32_t numBindingTable = 1; - BINDING_TABLE_STATE bindingTableState; - bindingTableState.sInit(); + BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState; uint32_t dims[] = {1, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder());