Improve queries with number of SubDevices

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2021-08-31 16:49:46 +00:00
committed by Compute-Runtime-Automation
parent 69ae9dc9c2
commit 0345d9f707
44 changed files with 229 additions and 173 deletions

View File

@@ -176,7 +176,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(&surfaceState, debugSurface->getGpuAddress(),
debugSurface->getUnderlyingBufferSize(), mocs,
false, false, false, neoDevice->getNumAvailableDevices(),
false, false, false, neoDevice->getNumGenericSubDevices(),
debugSurface, neoDevice->getGmmHelper(), kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}

View File

@@ -26,34 +26,34 @@ template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration) {
NEO::DispatchFlags dispatchFlags(
{}, //csrDependencies
nullptr, //barrierTimestampPacketNodes
{}, //pipelineSelectArgs
nullptr, //flushStampReference
NEO::QueueThrottle::MEDIUM, //throttle
this->getCommandListPreemptionMode(), //preemptionMode
this->commandContainer.lastSentNumGrfRequired, //numGrfRequired
NEO::L3CachingSettings::l3CacheOn, //l3CacheSettings
this->getThreadArbitrationPolicy(), //threadArbitrationPolicy
NEO::AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo
NEO::KernelExecutionType::NotApplicable, //kernelExecutionType
NEO::MemoryCompressionState::NotApplicable, //memoryCompressionState
NEO::QueueSliceCount::defaultSliceCount, //sliceCount
this->isSyncModeQueue, //blocking
this->isSyncModeQueue, //dcFlush
this->getCommandListSLMEnable(), //useSLM
this->isSyncModeQueue, //guardCommandBufferWithPipeControl
false, //GSBA32BitRequired
false, //requiresCoherency
false, //lowPriority
true, //implicitFlush
this->csr->isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false, //epilogueRequired
false, //usePerDssBackedBuffer
false, //useSingleSubdevice
false, //useGlobalAtomics
this->device->getNEODevice()->getNumAvailableDevices() > 1, //areMultipleSubDevicesInContext
false //memoryMigrationRequired
{}, //csrDependencies
nullptr, //barrierTimestampPacketNodes
{}, //pipelineSelectArgs
nullptr, //flushStampReference
NEO::QueueThrottle::MEDIUM, //throttle
this->getCommandListPreemptionMode(), //preemptionMode
this->commandContainer.lastSentNumGrfRequired, //numGrfRequired
NEO::L3CachingSettings::l3CacheOn, //l3CacheSettings
this->getThreadArbitrationPolicy(), //threadArbitrationPolicy
NEO::AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo
NEO::KernelExecutionType::NotApplicable, //kernelExecutionType
NEO::MemoryCompressionState::NotApplicable, //memoryCompressionState
NEO::QueueSliceCount::defaultSliceCount, //sliceCount
this->isSyncModeQueue, //blocking
this->isSyncModeQueue, //dcFlush
this->getCommandListSLMEnable(), //useSLM
this->isSyncModeQueue, //guardCommandBufferWithPipeControl
false, //GSBA32BitRequired
false, //requiresCoherency
false, //lowPriority
true, //implicitFlush
this->csr->isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false, //epilogueRequired
false, //usePerDssBackedBuffer
false, //useSingleSubdevice
false, //useGlobalAtomics
this->device->getNEODevice()->getNumGenericSubDevices() > 1, //areMultipleSubDevicesInContext
false //memoryMigrationRequired
);
this->commandContainer.removeDuplicatesFromResidencyContainer();

View File

@@ -242,7 +242,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(&surfaceState, debugSurface->getGpuAddress(),
debugSurface->getUnderlyingBufferSize(), mocs,
false, false, false, neoDevice->getNumAvailableDevices(),
false, false, false, neoDevice->getNumGenericSubDevices(),
debugSurface, neoDevice->getGmmHelper(),
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, 1u);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;

View File

@@ -686,28 +686,23 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, uint3
device->setDebugSurface(debugSurface);
}
if (device->neoDevice->getNumAvailableDevices() == 1) {
device->numSubDevices = 0;
} else {
for (uint32_t i = 0; i < device->neoDevice->getNumAvailableDevices(); i++) {
if (!((1UL << i) & currentDeviceMask)) {
continue;
}
ze_device_handle_t subDevice = Device::create(driverHandle,
device->neoDevice->getSubDevice(i),
0,
true, returnValue);
if (subDevice == nullptr) {
return nullptr;
}
static_cast<DeviceImp *>(subDevice)->isSubdevice = true;
static_cast<DeviceImp *>(subDevice)->setDebugSurface(debugSurface);
device->subDevices.push_back(static_cast<Device *>(subDevice));
for (uint32_t i = 0; i < device->neoDevice->getNumSubDevices(); i++) {
if (!((1UL << i) & currentDeviceMask)) {
continue;
}
device->numSubDevices = static_cast<uint32_t>(device->subDevices.size());
ze_device_handle_t subDevice = Device::create(driverHandle,
device->neoDevice->getSubDevice(i),
0,
true, returnValue);
if (subDevice == nullptr) {
return nullptr;
}
static_cast<DeviceImp *>(subDevice)->isSubdevice = true;
static_cast<DeviceImp *>(subDevice)->setDebugSurface(debugSurface);
device->subDevices.push_back(static_cast<Device *>(subDevice));
}
device->numSubDevices = static_cast<uint32_t>(device->subDevices.size());
if (neoDevice->getCompilerInterface()) {
auto &hwInfo = neoDevice->getHardwareInfo();

View File

@@ -19,7 +19,7 @@ NEO::Device *DeviceImp::getActiveDevice() const {
}
bool DeviceImp::isMultiDeviceCapable() const {
return neoDevice->getNumAvailableDevices() > 1u;
return neoDevice->getNumGenericSubDevices() > 1u;
}
void DeviceImp::processAdditionalKernelProperties(NEO::HwHelper &hwHelper, ze_device_module_properties_t *pKernelProperties) {

View File

@@ -69,7 +69,7 @@ struct KernelHw : public KernelImp {
NEO::Device *neoDevice = module->getDevice()->getNEODevice();
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(&surfaceState, bufferAddressForSsh, bufferSizeForSsh, mocs,
false, false, false, neoDevice->getNumAvailableDevices(),
false, false, false, neoDevice->getNumGenericSubDevices(),
alloc, neoDevice->getGmmHelper(),
kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress) = surfaceState;

View File

@@ -93,7 +93,7 @@ inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef
void *addressToPatch = reinterpret_cast<void *>(allocation.getUnderlyingBuffer());
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
NEO::Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0,
&allocation, 0, 0, useGlobalAtomics, device.getNumAvailableDevices() > 1);
&allocation, 0, 0, useGlobalAtomics, device.getNumGenericSubDevices() > 1);
}
}