/* * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_center.h" #include "shared/source/aub/aub_helper.h" #include "shared/source/aub_mem_dump/aub_alloc_dump.h" #include "shared/source/aub_mem_dump/aub_alloc_dump.inl" #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/product_helper.h" #include namespace NEO { template TbxCommandStreamReceiverHw::TbxCommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield) { forceSkipResourceCleanupRequired = true; physicalAddressAllocator.reset(this->createPhysicalAddressAllocator(&this->peekHwInfo())); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->initAubCenter(this->localMemoryEnabled, "", this->getType()); auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get(); UNRECOVERABLE_IF(nullptr == aubCenter); aubManager = aubCenter->getAubManager(); ppgtt = std::make_unique::type>(physicalAddressAllocator.get()); ggtt = std::make_unique(physicalAddressAllocator.get()); auto debugDeviceId = DebugManager.flags.OverrideAubDeviceId.get(); this->aubDeviceId = debugDeviceId == -1 ? this->peekHwInfo().capabilityTable.aubDeviceId : static_cast(debugDeviceId); this->stream = &tbxStream; this->downloadAllocationImpl = [this](GraphicsAllocation &graphicsAllocation) { this->downloadAllocationTbx(graphicsAllocation); }; } template TbxCommandStreamReceiverHw::~TbxCommandStreamReceiverHw() { this->downloadAllocationImpl = nullptr; if (streamInitialized) { tbxStream.close(); } this->freeEngineInfo(gttRemap); } template void TbxCommandStreamReceiverHw::initializeEngine() { isEngineInitialized = true; if (hardwareContextController) { hardwareContextController->initialize(); return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); if (engineInfo.pLRCA) { return; } this->initGlobalMMIO(); this->initEngineMMIO(); this->initAdditionalMMIO(); // Global HW Status Page { const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; engineInfo.pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); engineInfo.ggttHWSP = gttRemap.map(engineInfo.pGlobalHWStatusPage, sizeHWSP); auto physHWSP = ggtt->map(engineInfo.ggttHWSP, sizeHWSP, this->getGTTBits(), this->getMemoryBankForGtt()); // Write our GHWSP AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physHWSP), data); AUB::reserveAddressGGTT(tbxStream, engineInfo.ggttHWSP, sizeHWSP, physHWSP, data); tbxStream.writeMMIO(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2080), engineInfo.ggttHWSP); } // Allocate the LRCA const size_t sizeLRCA = csTraits.sizeLRCA; const size_t alignLRCA = csTraits.alignLRCA; auto pLRCABase = alignedMalloc(sizeLRCA, alignLRCA); engineInfo.pLRCA = pLRCABase; // Initialize the LRCA to a known state csTraits.initialize(pLRCABase); // Reserve the RCS ring buffer engineInfo.sizeRingBuffer = 0x4 * 0x1000; { const size_t alignRCS = 0x1000; engineInfo.pRingBuffer = alignedMalloc(engineInfo.sizeRingBuffer, alignRCS); engineInfo.ggttRingBuffer = gttRemap.map(engineInfo.pRingBuffer, engineInfo.sizeRingBuffer); auto physRCS = ggtt->map(engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, this->getGTTBits(), this->getMemoryBankForGtt()); AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physRCS), data); AUB::reserveAddressGGTT(tbxStream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRCS, data); } // Initialize the ring MMIO registers { uint32_t ringHead = 0x000; uint32_t ringTail = 0x000; auto ringBase = engineInfo.ggttRingBuffer; auto ringCtrl = (uint32_t)((engineInfo.sizeRingBuffer - 0x1000) | 1); csTraits.setRingHead(pLRCABase, ringHead); csTraits.setRingTail(pLRCABase, ringTail); csTraits.setRingBase(pLRCABase, ringBase); csTraits.setRingCtrl(pLRCABase, ringCtrl); } // Write our LRCA { engineInfo.ggttLRCA = gttRemap.map(engineInfo.pLRCA, sizeLRCA); auto lrcAddressPhys = ggtt->map(engineInfo.ggttLRCA, sizeLRCA, this->getGTTBits(), this->getMemoryBankForGtt()); AubGTTData data = {0}; this->getGTTData(reinterpret_cast(lrcAddressPhys), data); AUB::reserveAddressGGTT(tbxStream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys, data); AUB::addMemoryWrite( tbxStream, lrcAddressPhys, pLRCABase, sizeLRCA, this->getAddressSpace(csTraits.aubHintLRCA), csTraits.aubHintLRCA); } DEBUG_BREAK_IF(!engineInfo.pLRCA); } template CommandStreamReceiver *TbxCommandStreamReceiverHw::create(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { TbxCommandStreamReceiverHw *csr; auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]; auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); auto &gfxCoreHelper = rootDeviceEnvironment.getHelper(); const auto &productHelper = rootDeviceEnvironment.getHelper(); if (withAubDump) { auto localMemoryEnabled = gfxCoreHelper.getEnableLocalMemory(hwInfo); auto fullName = AUBCommandStreamReceiver::createFullFilePath(hwInfo, baseName, rootDeviceIndex); if (DebugManager.flags.AUBDumpCaptureFileName.get() != "unk") { fullName.assign(DebugManager.flags.AUBDumpCaptureFileName.get()); } rootDeviceEnvironment.initAubCenter(localMemoryEnabled, fullName, CommandStreamReceiverType::CSR_TBX_WITH_AUB); csr = new CommandStreamReceiverWithAUBDump>(baseName, executionEnvironment, rootDeviceIndex, deviceBitfield); auto aubCenter = rootDeviceEnvironment.aubCenter.get(); UNRECOVERABLE_IF(nullptr == aubCenter); auto subCaptureCommon = aubCenter->getSubCaptureCommon(); UNRECOVERABLE_IF(nullptr == subCaptureCommon); if (subCaptureCommon->subCaptureMode > AubSubCaptureManager::SubCaptureMode::Off) { csr->subCaptureManager = std::make_unique(fullName, *subCaptureCommon, ApiSpecificConfig::getRegistryPath()); } if (csr->aubManager) { if (!csr->aubManager->isOpen()) { csr->aubManager->open(csr->subCaptureManager ? csr->subCaptureManager->getSubCaptureFileName("") : fullName); UNRECOVERABLE_IF(!csr->aubManager->isOpen()); } } } else { csr = new TbxCommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield); } if (!csr->aubManager) { // Open our stream csr->stream->open(nullptr); // Add the file header. bool streamInitialized = csr->stream->init(productHelper.getAubStreamSteppingFromHwRevId(hwInfo), csr->aubDeviceId); csr->streamInitialized = streamInitialized; } return csr; } template SubmissionStatus TbxCommandStreamReceiverHw::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { if (subCaptureManager) { if (aubManager) { aubManager->pause(false); } } initializeEngine(); // Write our batch buffer auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset); auto batchBufferGpuAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset); auto currentOffset = batchBuffer.usedSize; DEBUG_BREAK_IF(currentOffset < batchBuffer.startOffset); auto sizeBatchBuffer = currentOffset - batchBuffer.startOffset; auto overrideRingHead = false; auto submissionTaskCount = this->taskCount + 1; allocationsForResidency.push_back(batchBuffer.commandBufferAllocation); batchBuffer.commandBufferAllocation->updateResidencyTaskCount(submissionTaskCount, this->osContext->getContextId()); batchBuffer.commandBufferAllocation->updateTaskCount(submissionTaskCount, osContext->getContextId()); // Write allocations for residency processResidency(allocationsForResidency, 0u); if (subCaptureManager) { if (aubManager) { auto status = subCaptureManager->getSubCaptureStatus(); if (!status.wasActiveInPreviousEnqueue && status.isActive) { overrideRingHead = true; } if (!status.wasActiveInPreviousEnqueue && !status.isActive) { aubManager->pause(true); } } } submitBatchBufferTbx( batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation), overrideRingHead); if (subCaptureManager) { pollForCompletion(); subCaptureManager->disableSubCapture(); } return SubmissionStatus::SUCCESS; } template void TbxCommandStreamReceiverHw::submitBatchBufferTbx(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead) { if (hardwareContextController) { if (batchBufferSize) { hardwareContextController->submit(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, MemoryConstants::pageSize64k, overrideRingHead); } return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); { auto physBatchBuffer = ppgtt->map(static_cast(batchBufferGpuAddress), batchBufferSize, entryBits, memoryBank); AubHelperHw aubHelperHw(this->localMemoryEnabled); AUB::reserveAddressPPGTT(tbxStream, static_cast(batchBufferGpuAddress), batchBufferSize, physBatchBuffer, entryBits, aubHelperHw); AUB::addMemoryWrite( tbxStream, physBatchBuffer, batchBuffer, batchBufferSize, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary), AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary); } // Add a batch buffer start to the RCS auto previousTail = engineInfo.tailRingBuffer; { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::MI_NOOP MI_NOOP; auto pTail = ptrOffset(engineInfo.pRingBuffer, engineInfo.tailRingBuffer); auto ggttTail = ptrOffset(engineInfo.ggttRingBuffer, engineInfo.tailRingBuffer); auto sizeNeeded = sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_NOOP) + sizeof(MI_LOAD_REGISTER_IMM); if (engineInfo.tailRingBuffer + sizeNeeded >= engineInfo.sizeRingBuffer) { // Pad the remaining ring with NOOPs auto sizeToWrap = engineInfo.sizeRingBuffer - engineInfo.tailRingBuffer; memset(pTail, 0, sizeToWrap); // write remaining ring auto physDumpStart = ggtt->map(ggttTail, sizeToWrap, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( tbxStream, physDumpStart, pTail, sizeToWrap, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); previousTail = 0; engineInfo.tailRingBuffer = 0; pTail = engineInfo.pRingBuffer; } else if (engineInfo.tailRingBuffer == 0) { // Add a LRI if this is our first submission auto lri = GfxFamily::cmdInitLoadRegisterImm; lri.setRegisterOffset(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2244)); lri.setDataDword(0x00010000); *(MI_LOAD_REGISTER_IMM *)pTail = lri; pTail = ((MI_LOAD_REGISTER_IMM *)pTail) + 1; } // Add our BBS auto bbs = GfxFamily::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddress(static_cast(batchBufferGpuAddress)); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)pTail = bbs; pTail = ((MI_BATCH_BUFFER_START *)pTail) + 1; // Add a NOOP as our tail needs to be aligned to a QWORD *(MI_NOOP *)pTail = GfxFamily::cmdInitNoop; pTail = ((MI_NOOP *)pTail) + 1; // Compute our new ring tail. engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer); // Only dump the new commands auto ggttDumpStart = ptrOffset(engineInfo.ggttRingBuffer, previousTail); auto dumpStart = ptrOffset(engineInfo.pRingBuffer, previousTail); auto dumpLength = engineInfo.tailRingBuffer - previousTail; // write RCS auto physDumpStart = ggtt->map(ggttDumpStart, dumpLength, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( tbxStream, physDumpStart, dumpStart, dumpLength, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); // update the RCS mmio tail in the LRCA auto physLRCA = ggtt->map(engineInfo.ggttLRCA, sizeof(engineInfo.tailRingBuffer), this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( tbxStream, physLRCA + 0x101c, &engineInfo.tailRingBuffer, sizeof(engineInfo.tailRingBuffer), this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceNotype)); DEBUG_BREAK_IF(engineInfo.tailRingBuffer >= engineInfo.sizeRingBuffer); } // Submit our execlist by submitting to the execlist submit ports { typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.valid = true; contextDescriptor.sData.forcePageDirRestore = false; contextDescriptor.sData.forceRestore = false; contextDescriptor.sData.legacy = true; contextDescriptor.sData.faultSupport = 0; contextDescriptor.sData.privilegeAccessOrPPGTT = true; contextDescriptor.sData.aDor64bitSupport = AUB::Traits::addressingBits > 32; auto ggttLRCA = engineInfo.ggttLRCA; contextDescriptor.sData.logicalRingCtxAddress = ggttLRCA / 4096; contextDescriptor.sData.contextID = 0; this->submitLRCA(contextDescriptor); } } template void TbxCommandStreamReceiverHw::pollForCompletion() { if (hardwareContextController) { hardwareContextController->pollForCompletion(); return; } typedef typename AubMemDump::CmdServicesMemTraceRegisterPoll CmdServicesMemTraceRegisterPoll; auto mmioBase = this->getCsTraits(osContext->getEngineType()).mmioBase; bool pollNotEqual = getpollNotEqualValueForPollForCompletion(); uint32_t mask = getMaskAndValueForPollForCompletion(); uint32_t value = mask; tbxStream.registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), // EXECLIST_STATUS mask, value, pollNotEqual, CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); } template void TbxCommandStreamReceiverHw::writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) { UNRECOVERABLE_IF(!isEngineInitialized); AubHelperHw aubHelperHw(this->localMemoryEnabled); PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { AUB::reserveAddressGGTTAndWriteMmeory(tbxStream, static_cast(gpuAddress), cpuAddress, physAddress, size, offset, entryBits, aubHelperHw); }; ppgtt->pageWalk(static_cast(gpuAddress), size, 0, entryBits, walker, memoryBank); } template bool TbxCommandStreamReceiverHw::writeMemory(GraphicsAllocation &gfxAllocation, bool isChunkCopy, uint64_t gpuVaChunkOffset, size_t chunkSize) { if (!this->isTbxWritable(gfxAllocation)) { return false; } if (!isEngineInitialized) { initializeEngine(); } uint64_t gpuAddress; void *cpuAddress; size_t size; if (!this->getParametersForMemory(gfxAllocation, gpuAddress, cpuAddress, size)) { return false; } if (aubManager) { this->writeMemoryWithAubManager(gfxAllocation, isChunkCopy, gpuVaChunkOffset, chunkSize); } else { if (isChunkCopy) { gpuAddress += gpuVaChunkOffset; cpuAddress = ptrOffset(cpuAddress, static_cast(gpuVaChunkOffset)); size = chunkSize; } writeMemory(gpuAddress, cpuAddress, size, this->getMemoryBank(&gfxAllocation), this->getPPGTTAdditionalBits(&gfxAllocation)); } if (AubHelper::isOneTimeAubWritableAllocationType(gfxAllocation.getAllocationType())) { this->setTbxWritable(false, gfxAllocation); } return true; } template void TbxCommandStreamReceiverHw::writeMMIO(uint32_t offset, uint32_t value) { if (hardwareContextController) { hardwareContextController->writeMMIO(offset, value); } } template bool TbxCommandStreamReceiverHw::expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) { if (hardwareContextController) { auto readMemory = std::make_unique(length); // note: memory bank should not matter assuming that we call expect on the memory that was previously allocated hardwareContextController->readMemory((uint64_t)gfxAddress, readMemory.get(), length, this->getMemoryBankForGtt(), MemoryConstants::pageSize64k); auto isMemoryEqual = (memcmp(readMemory.get(), srcAddress, length) == 0); auto isEqualMemoryExpected = (compareOperation == AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); return (isMemoryEqual == isEqualMemoryExpected); } return BaseClass::expectMemory(gfxAddress, srcAddress, length, compareOperation); } template void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait) { this->flushBatchedSubmissions(); if (this->latestFlushedTaskCount < taskCountToWait) { this->flushTagUpdate(); } volatile TagAddressType *pollAddress = this->getTagAddress(); for (uint32_t i = 0; i < this->activePartitions; i++) { while (*pollAddress < this->latestFlushedTaskCount) { this->downloadAllocation(*this->getTagAllocation()); } pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset); } auto lockCSR = this->obtainUniqueOwnership(); for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { this->downloadAllocation(*graphicsAllocation); } this->allocationsForDownload.clear(); } template WaitStatus TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { flushSubmissionsAndDownloadAllocations(taskCountToWait); return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); } template WaitStatus TbxCommandStreamReceiverHw::waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) { flushSubmissionsAndDownloadAllocations(taskCountToWait); return BaseClass::waitForCompletionWithTimeout(params, taskCountToWait); } template void TbxCommandStreamReceiverHw::processEviction() { this->allocationsForDownload.insert(this->getEvictionAllocations().begin(), this->getEvictionAllocations().end()); BaseClass::processEviction(); } template SubmissionStatus TbxCommandStreamReceiverHw::processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) { for (auto &gfxAllocation : allocationsForResidency) { if (dumpTbxNonWritable) { this->setTbxWritable(true, *gfxAllocation); } if (!writeMemory(*gfxAllocation)) { DEBUG_BREAK_IF(!((gfxAllocation->getUnderlyingBufferSize() == 0) || !this->isTbxWritable(*gfxAllocation))); } gfxAllocation->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId()); } dumpTbxNonWritable = false; return SubmissionStatus::SUCCESS; } template void TbxCommandStreamReceiverHw::downloadAllocationTbx(GraphicsAllocation &gfxAllocation) { uint64_t gpuAddress = 0; void *cpuAddress = nullptr; size_t size = 0; this->getParametersForMemory(gfxAllocation, gpuAddress, cpuAddress, size); if (hardwareContextController) { hardwareContextController->readMemory(gpuAddress, cpuAddress, size, this->getMemoryBank(&gfxAllocation), gfxAllocation.getUsedPageSize()); return; } if (size) { PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { DEBUG_BREAK_IF(offset > size); tbxStream.readMemory(physAddress, ptrOffset(cpuAddress, offset), size); }; ppgtt->pageWalk(static_cast(gpuAddress), size, 0, 0, walker, this->getMemoryBank(&gfxAllocation)); } } template void TbxCommandStreamReceiverHw::downloadAllocations() { volatile TagAddressType *pollAddress = this->getTagAddress(); for (uint32_t i = 0; i < this->activePartitions; i++) { while (*pollAddress < this->latestFlushedTaskCount) { this->downloadAllocation(*this->getTagAllocation()); } pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset); } auto lockCSR = this->obtainUniqueOwnership(); for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { this->downloadAllocation(*graphicsAllocation); } this->allocationsForDownload.clear(); } template uint32_t TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() const { return 0x100; } template bool TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion() const { return false; } template AubSubCaptureStatus TbxCommandStreamReceiverHw::checkAndActivateAubSubCapture(const std::string &kernelName) { if (!subCaptureManager) { return {false, false}; } auto status = subCaptureManager->checkAndActivateSubCapture(kernelName); if (status.isActive && !status.wasActiveInPreviousEnqueue) { dumpTbxNonWritable = true; } return status; } template void TbxCommandStreamReceiverHw::dumpAllocation(GraphicsAllocation &gfxAllocation) { if (!hardwareContextController) { return; } bool isBcsCsr = EngineHelpers::isBcs(this->osContext->getEngineType()); if (isBcsCsr != gfxAllocation.getAubInfo().bcsDumpOnly) { return; } if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get() || DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) { if (!gfxAllocation.isAllocDumpable()) { return; } gfxAllocation.setAllocDumpable(false, isBcsCsr); } auto dumpFormat = AubAllocDump::getDumpFormat(gfxAllocation); auto surfaceInfo = std::unique_ptr(AubAllocDump::getDumpSurfaceInfo(gfxAllocation, *this->peekGmmHelper(), dumpFormat)); if (surfaceInfo) { hardwareContextController->pollForCompletion(); hardwareContextController->dumpSurface(*surfaceInfo.get()); } } } // namespace NEO