Use BCS to copy ISA if required - part 2

Change-Id: I8a06433640cb3ce6de8f1e1a367501572a6f77de
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2020-09-21 16:44:46 +02:00
committed by sys_ocldev
parent 6ba7a3f6c1
commit 9a29a75600
2 changed files with 79 additions and 2 deletions

View File

@@ -8,7 +8,7 @@
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/kernel_helpers.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/helpers/string.h"
@@ -105,8 +105,17 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, NEO::MemoryMan
auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(
{device->getRootDeviceIndex(), kernelIsaSize, NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, device->getDeviceBitfield()});
UNRECOVERABLE_IF(allocation == nullptr);
auto &hwInfo = device->getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (kernelInfo->heapInfo.pKernelHeap != nullptr) {
memoryManager.copyMemoryToAllocation(allocation, kernelInfo->heapInfo.pKernelHeap, kernelIsaSize);
if (allocation->isAllocatedInLocalMemoryPool() && hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo)) {
auto status = NEO::BlitHelperFunctions::blitMemoryToAllocation(*device, allocation, 0, kernelInfo->heapInfo.pKernelHeap, {kernelIsaSize, 1, 1});
UNRECOVERABLE_IF(status != NEO::BlitOperationResult::Success);
} else {
memoryManager.copyMemoryToAllocation(allocation, kernelInfo->heapInfo.pKernelHeap, kernelIsaSize);
}
}
isaGraphicsAllocation.reset(allocation);

View File

@@ -455,5 +455,73 @@ HWTEST_F(KernelLocalIdsTest, WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRu
EXPECT_TRUE(kernel->requiresGenerationOfLocalIdsByRuntime());
}
struct KernelIsaTests : Test<ModuleFixture> {
void SetUp() override {
Test<ModuleFixture>::SetUp();
auto &capabilityTable = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
capabilityTable.blitterOperationsSupported = true;
if (createBcsEngine) {
auto &engine = device->getNEODevice()->getEngine(0);
bcsOsContext.reset(OsContext::create(nullptr, 1, device->getNEODevice()->getDeviceBitfield(), aub_stream::ENGINE_BCS, PreemptionMode::Disabled,
false, false, false));
engine.osContext = bcsOsContext.get();
engine.commandStreamReceiver->setupContext(*bcsOsContext);
}
}
std::unique_ptr<OsContext> bcsOsContext;
};
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) {
DebugManagerStateRestore restore;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessDisallowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1));
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.KernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver;
auto initialTaskCount = bcsCsr->peekTaskCount();
kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr);
if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) {
EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount());
} else {
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
}
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) {
DebugManagerStateRestore restore;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::CpuAccessAllowed));
DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast<int64_t>(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1));
uint32_t kernelHeap = 0;
KernelInfo kernelInfo;
kernelInfo.heapInfo.KernelHeapSize = 1;
kernelInfo.heapInfo.pKernelHeap = &kernelHeap;
KernelImmutableData kernelImmutableData(device);
auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver;
auto initialTaskCount = bcsCsr->peekTaskCount();
kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr);
EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount());
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
}
} // namespace ult
} // namespace L0