Optimize virtual calls #2.

Optimize frequently used virtual cost.
Compiler cannot inline those which causes overhead.

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2021-12-16 12:29:09 +00:00
committed by Compute-Runtime-Automation
parent c30f65fe40
commit 62faecf6d5
13 changed files with 35 additions and 42 deletions

View File

@@ -43,7 +43,13 @@ struct DebugSession;
enum class ModuleType;
struct Device : _ze_device_handle_t {
virtual uint32_t getRootDeviceIndex() = 0;
uint32_t getRootDeviceIndex() const {
return neoDevice->getRootDeviceIndex();
}
NEO::Device *getNEODevice() const {
return this->neoDevice;
}
virtual ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) = 0;
virtual ze_result_t createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) = 0;
@@ -118,7 +124,6 @@ struct Device : _ze_device_handle_t {
virtual NEO::PreemptionMode getDevicePreemptionMode() const = 0;
virtual const NEO::DeviceInfo &getDeviceInfo() const = 0;
virtual NEO::Device *getNEODevice() = 0;
NEO::SourceLevelDebugger *getSourceLevelDebugger() { return getNEODevice()->getSourceLevelDebugger(); }
DebuggerL0 *getL0Debugger() {
auto debugger = getNEODevice()->getDebugger();
@@ -142,6 +147,7 @@ struct Device : _ze_device_handle_t {
virtual void storeReusableAllocation(NEO::GraphicsAllocation &alloc) = 0;
protected:
NEO::Device *neoDevice = nullptr;
bool implicitScalingCapable = false;
};

View File

@@ -52,10 +52,6 @@ bool releaseFP64Override();
namespace L0 {
uint32_t DeviceImp::getRootDeviceIndex() {
return neoDevice->getRootDeviceIndex();
}
DriverHandle *DeviceImp::getDriverHandle() {
return this->driverHandle;
}
@@ -942,10 +938,6 @@ const NEO::DeviceInfo &DeviceImp::getDeviceInfo() const {
return neoDevice->getDeviceInfo();
}
NEO::Device *DeviceImp::getNEODevice() {
return neoDevice;
}
NEO::GraphicsAllocation *DeviceImp::allocateManagedMemoryFromHostPtr(void *buffer, size_t size, struct CommandList *commandList) {
char *baseAddress = reinterpret_cast<char *>(buffer);
NEO::GraphicsAllocation *allocation = nullptr;

View File

@@ -29,7 +29,6 @@ namespace L0 {
struct SysmanDevice;
struct DeviceImp : public Device {
uint32_t getRootDeviceIndex() override;
ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) override;
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override;
@@ -83,7 +82,6 @@ struct DeviceImp : public Device {
NEO::PreemptionMode getDevicePreemptionMode() const override;
const NEO::DeviceInfo &getDeviceInfo() const override;
NEO::Device *getNEODevice() override;
void activateMetricGroups() override;
void processAdditionalKernelProperties(NEO::HwHelper &hwHelper, ze_device_module_properties_t *pKernelProperties);
NEO::GraphicsAllocation *getDebugSurface() const override { return debugSurface; }
@@ -102,7 +100,6 @@ struct DeviceImp : public Device {
bool toPhysicalSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t &deviceIndex);
bool toApiSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t deviceIndex);
NEO::Device *neoDevice = nullptr;
bool isSubdevice = false;
void *execEnvironment = nullptr;
std::unique_ptr<BuiltinFunctionsLib> builtins = nullptr;

View File

@@ -26,7 +26,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + (desc->index * eventPool->getEventSize()));
event->signalScope = desc->signal;
event->waitScope = desc->wait;
event->csr = static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver;
event->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
EventPoolImp *EventPoolImp = static_cast<struct EventPoolImp *>(eventPool);
// do not reset even if it has been imported, since event pool
@@ -309,7 +309,7 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
auto packetId = i;
if (deviceImp->isSubdevice) {
packetId = static_cast<NEO::SubDevice *>(deviceImp->neoDevice)->getSubDeviceIndex();
packetId = static_cast<NEO::SubDevice *>(deviceImp->getNEODevice())->getSubDeviceIndex();
}
globalStartTs = kernelEventCompletionData[timestampPacket].getGlobalStartValue(packetId);

View File

@@ -210,8 +210,7 @@ HWTEST2_F(CommandQueueGroupMultiDevice,
returnValue));
L0::CommandQueueImp *cmdQueue = reinterpret_cast<CommandQueueImp *>(commandList0->cmdQImmediate);
L0::DeviceImp *deviceImp = reinterpret_cast<L0::DeviceImp *>(device);
auto &nearestSubDevice = *deviceImp->neoDevice->getNearestGenericSubDevice(0);
auto &nearestSubDevice = *device->getNEODevice()->getNearestGenericSubDevice(0);
const auto rcsIndex = nearestSubDevice.getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::RenderCompute);
auto expectedCSR = nearestSubDevice.getRegularEngineGroups()[rcsIndex].engines[queueGroupIndex].commandStreamReceiver;
EXPECT_EQ(cmdQueue->getCsr(), expectedCSR);

View File

@@ -20,6 +20,8 @@ template <>
struct WhiteBox<::L0::Device> : public ::L0::Device {
using Base = L0::Device;
using Base::implicitScalingCapable;
using L0::Device::getNEODevice;
using L0::Device::neoDevice;
};
using Device = WhiteBox<::L0::Device>;
@@ -28,7 +30,6 @@ template <>
struct Mock<Device> : public Device {
Mock() = default;
ADDMETHOD_NOBASE(getRootDeviceIndex, uint32_t, 0u, ());
ADDMETHOD_NOBASE(canAccessPeer, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hPeerDevice, ze_bool_t *value));
ADDMETHOD_NOBASE(createCommandList, ze_result_t, ZE_RESULT_SUCCESS, (const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList));
ADDMETHOD_NOBASE(createCommandListImmediate, ze_result_t, ZE_RESULT_SUCCESS, (const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList));
@@ -66,7 +67,6 @@ struct Mock<Device> : public Device {
ADDMETHOD_NOBASE_VOIDRETURN(setDriverHandle, (L0::DriverHandle *));
ADDMETHOD_CONST_NOBASE(getDevicePreemptionMode, NEO::PreemptionMode, NEO::PreemptionMode::Initial, ());
ADDMETHOD_CONST_NOBASE_REFRETURN(getDeviceInfo, const NEO::DeviceInfo &, ());
ADDMETHOD_NOBASE(getNEODevice, NEO::Device *, nullptr, ());
ADDMETHOD_NOBASE_VOIDRETURN(activateMetricGroups, ());
ADDMETHOD_CONST_NOBASE(getDebugSurface, NEO::GraphicsAllocation *, nullptr, ());
ADDMETHOD_NOBASE(allocateManagedMemoryFromHostPtr, NEO::GraphicsAllocation *, nullptr, (void *buffer, size_t size, struct L0::CommandList *commandList));

View File

@@ -873,7 +873,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendSignalEvent
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendSignalEvent(event);
@@ -913,7 +913,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendBarrierThen
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendBarrier(nullptr, 1, &event);
@@ -955,7 +955,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendResetEventT
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendEventReset(event);
@@ -995,7 +995,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEven
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendSignalEvent(event);
@@ -1035,7 +1035,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThe
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendBarrier(event, 0, nullptr);
@@ -1078,7 +1078,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndCopyEngineAndApp
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendBarrier(event, 0, nullptr);
@@ -1120,7 +1120,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventReset
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendEventReset(event);

View File

@@ -1119,7 +1119,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEven
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendSignalEvent(event);
@@ -1159,7 +1159,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThe
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendBarrier(event, 0, nullptr);
@@ -1201,7 +1201,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventReset
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
commandList->appendEventReset(event);

View File

@@ -546,7 +546,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediat
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
returnValue = commandList->appendWaitOnEvents(1, &event);
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
@@ -609,7 +609,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmedia
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
returnValue = commandList->appendWaitOnEvents(1, &event);
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);

View File

@@ -2181,10 +2181,10 @@ TEST_F(DeviceTest, givenNoL0DebuggerWhenGettingL0DebuggerThenNullptrReturned) {
TEST_F(DeviceTest, givenValidDeviceWhenCallingReleaseResourcesThenResourcesReleased) {
auto deviceImp = static_cast<DeviceImp *>(device);
EXPECT_FALSE(deviceImp->resourcesReleased);
EXPECT_FALSE(nullptr == deviceImp->neoDevice);
EXPECT_FALSE(nullptr == deviceImp->getNEODevice());
deviceImp->releaseResources();
EXPECT_TRUE(deviceImp->resourcesReleased);
EXPECT_TRUE(nullptr == deviceImp->neoDevice);
EXPECT_TRUE(nullptr == deviceImp->getNEODevice());
EXPECT_TRUE(nullptr == deviceImp->pageFaultCommandList);
EXPECT_TRUE(nullptr == deviceImp->getDebugSurface());
deviceImp->releaseResources();

View File

@@ -213,7 +213,7 @@ TEST_F(EventPoolCreate, givenAnEventIsCreatedFromThisEventPoolThenEventContainsD
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
ASSERT_NE(nullptr, event_object->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
}
TEST_F(EventPoolCreate, GivenNoDeviceThenEventPoolIsCreated) {
@@ -506,7 +506,7 @@ TEST_F(EventCreate, givenAnEventCreatedThenTheEventHasTheDeviceCommandStreamRece
std::unique_ptr<L0::Event> event(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
ASSERT_NE(nullptr, event);
ASSERT_NE(nullptr, event.get()->csr);
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event.get()->csr);
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event.get()->csr);
}
TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndOffsetAreSet) {

View File

@@ -1418,14 +1418,12 @@ using KernelImpPatchBindlessTest = Test<ModuleFixture>;
TEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenPatchBindlessOffsetCalledThenOffsetPatchedCorrectly) {
Mock<Kernel> kernel;
WhiteBox<::L0::DeviceImp> mockDevice;
mockDevice.neoDevice = neoDevice;
neoDevice->incRefInternal();
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
neoDevice->getNumGenericSubDevices() > 1,
neoDevice->getRootDeviceIndex(),
neoDevice->getDeviceBitfield());
Mock<Module> mockModule(&mockDevice, nullptr);
Mock<Module> mockModule(device, nullptr);
kernel.module = &mockModule;
NEO::MockGraphicsAllocation alloc;
uint32_t bindless = 0x40;
@@ -1440,6 +1438,7 @@ TEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenPatchBindlessOffsetCalledTh
EXPECT_EQ(ssPtr, expectedSsInHeap.ssPtr);
EXPECT_TRUE(memcmp(const_cast<uint8_t *>(patchLocation), &patchValue, sizeof(patchValue)) == 0);
EXPECT_TRUE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), expectedSsInHeap.heapAllocation) != kernel.getResidencyContainer().end());
neoDevice->decRefInternal();
}
HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessThenSurfaceStateUpdated, MatchAny) {

View File

@@ -216,7 +216,7 @@ void MetricsLibrary::getSubDeviceClientOptions(
subDeviceIndex.SubDeviceIndex.Index = 0;
subDeviceCount.Type = ClientOptionsType::SubDeviceCount;
subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.neoDevice->getRootDevice()->getNumSubDevices(), 1u);
subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u);
workloadPartition.Type = ClientOptionsType::WorkloadPartition;
workloadPartition.WorkloadPartition.Enabled = false;
@@ -228,10 +228,10 @@ void MetricsLibrary::getSubDeviceClientOptions(
subDevice.SubDevice.Enabled = true;
subDeviceIndex.Type = ClientOptionsType::SubDeviceIndex;
subDeviceIndex.SubDeviceIndex.Index = static_cast<NEO::SubDevice *>(deviceImp.neoDevice)->getSubDeviceIndex();
subDeviceIndex.SubDeviceIndex.Index = static_cast<NEO::SubDevice *>(deviceImp.getNEODevice())->getSubDeviceIndex();
subDeviceCount.Type = ClientOptionsType::SubDeviceCount;
subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.neoDevice->getRootDevice()->getNumSubDevices(), 1u);
subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u);
workloadPartition.Type = ClientOptionsType::WorkloadPartition;
workloadPartition.WorkloadPartition.Enabled = isWorkloadPartitionEnabled;
@@ -254,7 +254,7 @@ bool MetricsLibrary::createContext() {
});
const auto &deviceImp = *static_cast<DeviceImp *>(&device);
const auto &commandStreamReceiver = *deviceImp.neoDevice->getDefaultEngine().commandStreamReceiver;
const auto &commandStreamReceiver = *deviceImp.getNEODevice()->getDefaultEngine().commandStreamReceiver;
const auto engineType = commandStreamReceiver.getOsContext().getEngineType();
const bool isComputeUsed = NEO::EngineHelpers::isCcs(engineType);