mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
feature: global bindless sampler offsets
- use global heap base for DSH when bindlessHeapsHelper enabled Related-To: NEO-10505 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
25bed07989
commit
d38ac4806b
@@ -41,10 +41,10 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
|
||||
uint64_t bindlessSurfStateBase = 0ull;
|
||||
|
||||
if (neoDevice->getBindlessHeapsHelper()) {
|
||||
if (neoDevice->getBindlessHeapsHelper()->isGlobalDshSupported()) {
|
||||
useGlobalSshAndDsh = true;
|
||||
globalHeapsBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
} else {
|
||||
useGlobalSshAndDsh = true;
|
||||
globalHeapsBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
|
||||
if (!neoDevice->getBindlessHeapsHelper()->isGlobalDshSupported()) {
|
||||
bindlessSurfStateBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,10 +43,10 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
|
||||
uint64_t bindlessSurfStateBase = 0;
|
||||
|
||||
if (device->getNEODevice()->getBindlessHeapsHelper()) {
|
||||
if (device->getNEODevice()->getBindlessHeapsHelper()->isGlobalDshSupported()) {
|
||||
useGlobalSshAndDsh = true;
|
||||
globalHeapsBase = device->getNEODevice()->getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
} else {
|
||||
useGlobalSshAndDsh = true;
|
||||
globalHeapsBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
|
||||
if (!device->getNEODevice()->getBindlessHeapsHelper()->isGlobalDshSupported()) {
|
||||
bindlessSurfStateBase = device->getNEODevice()->getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,6 +83,13 @@ __kernel void image_read_sampler(__global float4 *dst, image2d_t img, sampler_t
|
||||
size_t dstOffset = get_global_id(1) * get_image_width(img) + get_global_id(0);
|
||||
dst[dstOffset] = read_imagef(img, sampler, coord);
|
||||
}
|
||||
|
||||
__kernel void image_read_sampler_oob(__global float4 *dst, image2d_t img, sampler_t sampler) {
|
||||
int2 coord = {get_global_id(0) + 1, get_global_id(1) + 1};
|
||||
size_t dstOffset = get_global_id(1) * get_image_width(img) + get_global_id(0);
|
||||
dst[dstOffset] = read_imagef(img, sampler, coord);
|
||||
printf(" gid[ %d, %d] %.2f , %.2f , %.2f , %.2f \n", get_global_id(0), get_global_id(1), dst[dstOffset].x, dst[dstOffset].y, dst[dstOffset].z, dst[dstOffset].w );
|
||||
}
|
||||
)===";
|
||||
|
||||
const char *source5 = R"===(
|
||||
@@ -172,6 +179,7 @@ static std::string kernelName = "kernel_copy";
|
||||
static std::string kernelName2 = "kernel_fill";
|
||||
static std::string kernelName3 = "image_copy";
|
||||
static std::string kernelName4 = "image_read_sampler";
|
||||
static std::string kernelName4a = "image_read_sampler_oob";
|
||||
|
||||
enum class ExecutionMode : uint32_t {
|
||||
commandQueue,
|
||||
@@ -342,6 +350,135 @@ bool testBindlessBufferCopy(ze_context_handle_t context, ze_device_handle_t devi
|
||||
return outputValidated;
|
||||
}
|
||||
|
||||
void runBindlessBindful(const ze_kernel_handle_t &firstKernel, const ze_kernel_handle_t &secondKernel,
|
||||
ze_context_handle_t &context, ze_device_handle_t &device, ExecutionMode mode, bool &outputValidationSuccessful) {
|
||||
|
||||
LevelZeroBlackBoxTests::CommandHandler commandHandler;
|
||||
bool isImmediateCmdList = (mode == ExecutionMode::immSyncCmdList);
|
||||
|
||||
SUCCESS_OR_TERMINATE(commandHandler.create(context, device, isImmediateCmdList));
|
||||
|
||||
constexpr size_t allocSize = 32;
|
||||
ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
|
||||
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED;
|
||||
|
||||
void *buffer1 = nullptr;
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, &buffer1));
|
||||
|
||||
void *buffer2 = nullptr;
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, &buffer2));
|
||||
|
||||
void *buffer3 = nullptr;
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, &buffer3));
|
||||
|
||||
// Initialize memory
|
||||
constexpr uint8_t val1 = 55;
|
||||
constexpr uint8_t val2 = 22;
|
||||
constexpr uint8_t val3 = 77;
|
||||
|
||||
memset(buffer1, 0, allocSize);
|
||||
memset(buffer2, 0, allocSize);
|
||||
memset(buffer3, 0, allocSize);
|
||||
|
||||
ze_group_count_t dispatchTraits;
|
||||
dispatchTraits.groupCountX = 1u;
|
||||
dispatchTraits.groupCountY = 1u;
|
||||
dispatchTraits.groupCountZ = 1u;
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(firstKernel, 0, sizeof(buffer1), &buffer1));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(firstKernel, 1, sizeof(char), &val1));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(firstKernel, 32U, 1U, 1U));
|
||||
SUCCESS_OR_TERMINATE(commandHandler.appendKernel(firstKernel, dispatchTraits));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(commandHandler.cmdList, nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(secondKernel, 0, sizeof(buffer2), &buffer2));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(secondKernel, 1, sizeof(val2), &val2));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(secondKernel, 32U, 1U, 1U));
|
||||
SUCCESS_OR_TERMINATE(commandHandler.appendKernel(secondKernel, dispatchTraits));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(commandHandler.cmdList, nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(firstKernel, 0, sizeof(buffer1), &buffer3));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(firstKernel, 1, sizeof(char), &val3));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(firstKernel, 32U, 1U, 1U));
|
||||
SUCCESS_OR_TERMINATE(commandHandler.appendKernel(firstKernel, dispatchTraits));
|
||||
|
||||
SUCCESS_OR_TERMINATE(commandHandler.execute());
|
||||
SUCCESS_OR_TERMINATE(commandHandler.synchronize());
|
||||
|
||||
outputValidationSuccessful = true;
|
||||
uint8_t *buffer1Uint = static_cast<uint8_t *>(buffer1);
|
||||
uint8_t *buffer2Uint = static_cast<uint8_t *>(buffer2);
|
||||
uint8_t *buffer3Uint = static_cast<uint8_t *>(buffer3);
|
||||
for (size_t i = 0; i < allocSize; i++) {
|
||||
if (buffer1Uint[i] != val1) {
|
||||
std::cout << "buffer1Uint[" << i << "] = " << std::dec << static_cast<unsigned int>(buffer1Uint[i]) << " not equal to "
|
||||
<< "val1 = " << std::dec << static_cast<unsigned int>(val1) << "\n";
|
||||
outputValidationSuccessful = false;
|
||||
break;
|
||||
}
|
||||
if (buffer2Uint[i] != val2) {
|
||||
std::cout << "buffer2Uint[" << i << "] = " << std::dec << static_cast<unsigned int>(buffer2Uint[i]) << " not equal to "
|
||||
<< "val2 = " << std::dec << static_cast<unsigned int>(val2) << "\n";
|
||||
outputValidationSuccessful = false;
|
||||
break;
|
||||
}
|
||||
if (buffer3Uint[i] != val3) {
|
||||
std::cout << "buffer3Uint[" << i << "] = " << std::dec << static_cast<unsigned int>(buffer3Uint[i]) << " not equal to "
|
||||
<< "val3 = " << std::dec << static_cast<unsigned int>(val3) << "\n";
|
||||
outputValidationSuccessful = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!outputValidationSuccessful) {
|
||||
std::cout << " TEST FAILED\n"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, buffer1));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, buffer2));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, buffer3));
|
||||
}
|
||||
|
||||
bool testBindlessBindfulKernel(ze_context_handle_t context, ze_device_handle_t device, const std::string &deviceId, const std::string &revisionId) {
|
||||
bool outputValidated = false;
|
||||
|
||||
ze_module_handle_t module = nullptr;
|
||||
ze_module_handle_t module2 = nullptr;
|
||||
createModule(source2, AddressingMode::bindless, context, device, deviceId, revisionId, module, "");
|
||||
createModule(source2, AddressingMode::defaultMode, context, device, deviceId, revisionId, module2, "");
|
||||
|
||||
ExecutionMode executionModes[] = {ExecutionMode::commandQueue, ExecutionMode::immSyncCmdList};
|
||||
ze_kernel_handle_t bindlessKernel = nullptr;
|
||||
ze_kernel_handle_t bindfulKernel = nullptr;
|
||||
createKernel(module, bindlessKernel, kernelName2.c_str());
|
||||
createKernel(module2, bindfulKernel, kernelName2.c_str());
|
||||
|
||||
std::pair<ze_kernel_handle_t, ze_kernel_handle_t> kernelOrder[2] = {{bindlessKernel, bindfulKernel},
|
||||
{bindfulKernel, bindlessKernel}};
|
||||
|
||||
for (auto kernel : kernelOrder) {
|
||||
|
||||
for (auto mode : executionModes) {
|
||||
|
||||
runBindlessBindful(kernel.first, kernel.second, context, device, mode, outputValidated);
|
||||
|
||||
if (!outputValidated) {
|
||||
std::cout << "testBindlessBindfulKernel with mode " << static_cast<uint32_t>(mode) << " failed.\n"
|
||||
<< std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeKernelDestroy(bindlessKernel));
|
||||
SUCCESS_OR_TERMINATE(zeKernelDestroy(bindfulKernel));
|
||||
SUCCESS_OR_TERMINATE(zeModuleDestroy(module));
|
||||
SUCCESS_OR_TERMINATE(zeModuleDestroy(module2));
|
||||
|
||||
return outputValidated;
|
||||
}
|
||||
|
||||
bool testBindlessImages(ze_context_handle_t context, ze_device_handle_t device, const std::string &deviceId, const std::string &revisionId,
|
||||
int imageCount, AddressingMode mode) {
|
||||
bool outputValidated = false;
|
||||
@@ -560,6 +697,132 @@ bool testBindlessImageSampled(ze_context_handle_t context, ze_device_handle_t de
|
||||
SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel));
|
||||
SUCCESS_OR_TERMINATE(zeModuleDestroy(module));
|
||||
|
||||
if (!outputValidated) {
|
||||
std::cout << "\nTest FAILED" << std::endl;
|
||||
} else {
|
||||
std::cout << "\nTest PASSED" << std::endl;
|
||||
}
|
||||
|
||||
return outputValidated;
|
||||
}
|
||||
|
||||
bool testBindlessImageSampledBorderColor(ze_context_handle_t context, ze_device_handle_t device, const std::string &deviceId,
|
||||
const std::string &revisionId, AddressingMode mode) {
|
||||
bool outputValidated = true;
|
||||
|
||||
ze_module_handle_t module = nullptr;
|
||||
ze_kernel_handle_t kernel = nullptr;
|
||||
|
||||
createModule(source4, mode, context, device, deviceId, revisionId, module, "");
|
||||
createKernel(module, kernel, kernelName4a.c_str());
|
||||
|
||||
LevelZeroBlackBoxTests::CommandHandler commandHandler;
|
||||
SUCCESS_OR_TERMINATE(commandHandler.create(context, device, true));
|
||||
|
||||
ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
|
||||
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
|
||||
|
||||
ze_sampler_desc_t samplerDesc = {ZE_STRUCTURE_TYPE_SAMPLER_DESC,
|
||||
nullptr,
|
||||
ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
ZE_SAMPLER_FILTER_MODE_NEAREST,
|
||||
false};
|
||||
ze_sampler_handle_t sampler;
|
||||
SUCCESS_OR_TERMINATE(zeSamplerCreate(context, device, &samplerDesc, &sampler));
|
||||
|
||||
ze_image_desc_t srcImgDesc = {ZE_STRUCTURE_TYPE_IMAGE_DESC,
|
||||
nullptr,
|
||||
0,
|
||||
ZE_IMAGE_TYPE_2D,
|
||||
{ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_FLOAT,
|
||||
ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G,
|
||||
ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G},
|
||||
2,
|
||||
2,
|
||||
1,
|
||||
0,
|
||||
0};
|
||||
|
||||
constexpr size_t bytesPerPixel = sizeof(float) * 4;
|
||||
size_t bytesPerRow = srcImgDesc.width * bytesPerPixel;
|
||||
size_t allocSize = bytesPerRow * srcImgDesc.height;
|
||||
|
||||
// Create and initialize host memory
|
||||
void *dstBuffer;
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, &dstBuffer));
|
||||
for (uint32_t i = 0; i < srcImgDesc.height; ++i) {
|
||||
float *dstRow = reinterpret_cast<float *>(dstBuffer) + srcImgDesc.width * 4 * i;
|
||||
for (size_t j = 0; j < srcImgDesc.width; ++j) {
|
||||
for (size_t k = 0; k < 4; ++k) {
|
||||
dstRow[j * 4 + k] = 4.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ze_image_handle_t srcImg;
|
||||
ze_group_count_t dispatchTraits;
|
||||
dispatchTraits.groupCountX = 1u;
|
||||
dispatchTraits.groupCountY = 2u;
|
||||
dispatchTraits.groupCountZ = 1u;
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeImageCreate(context, device, &srcImgDesc, &srcImg));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(dstBuffer), &dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(srcImg), &srcImg));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(sampler), &sampler));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, 2U, 1U, 1U));
|
||||
|
||||
ze_image_region_t srcRegion = {0, 0, 0, (uint32_t)srcImgDesc.width, (uint32_t)srcImgDesc.height, (uint32_t)srcImgDesc.depth};
|
||||
|
||||
std::vector<float> data(srcImgDesc.width * srcImgDesc.height * 4);
|
||||
memcpy(data.data(), dstBuffer, allocSize);
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyFromMemory(commandHandler.cmdList, srcImg, data.data(),
|
||||
&srcRegion, nullptr, 0, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(commandHandler.cmdList, nullptr, 0, nullptr));
|
||||
SUCCESS_OR_TERMINATE(commandHandler.appendKernel(kernel, dispatchTraits));
|
||||
SUCCESS_OR_TERMINATE(commandHandler.execute());
|
||||
SUCCESS_OR_TERMINATE(commandHandler.synchronize());
|
||||
|
||||
// Validate
|
||||
float *dst = reinterpret_cast<float *>(dstBuffer);
|
||||
std::vector<float> reference = {0.f, 0.f, 0.f, 0.f};
|
||||
|
||||
for (size_t i = 0; (i < srcImgDesc.height); ++i) {
|
||||
for (size_t j = 0; j < srcImgDesc.width; ++j) {
|
||||
if (i == 0 && j == 0) {
|
||||
for (size_t k = 0; k < 4; k++) {
|
||||
if (dst[k] != 4.0f) {
|
||||
std::cerr << "error: dstBuffer[" << i << "][" << j << "] = " << dst[k] << " is not equal to " << 4.0f << "\n";
|
||||
outputValidated = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t k = 0; k < 4; k++) {
|
||||
if (dst[k] != reference[k]) {
|
||||
std::cerr << "error: dstBuffer[" << i << "][" << j << "] = " << dst[k] << " is not equal to " << reference[k] << "\n";
|
||||
outputValidated = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeSamplerDestroy(sampler));
|
||||
SUCCESS_OR_TERMINATE(zeImageDestroy(srcImg));
|
||||
SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel));
|
||||
SUCCESS_OR_TERMINATE(zeModuleDestroy(module));
|
||||
|
||||
if (!outputValidated) {
|
||||
std::cout << "\nTest FAILED" << std::endl;
|
||||
} else {
|
||||
std::cout << "\nTest PASSED" << std::endl;
|
||||
}
|
||||
|
||||
return outputValidated;
|
||||
}
|
||||
|
||||
@@ -1005,7 +1268,7 @@ int main(int argc, char *argv[]) {
|
||||
ze_device_uuid_t uuid = deviceProperties.uuid;
|
||||
std::string revisionId = std::to_string(reinterpret_cast<uint16_t *>(uuid.id)[2]);
|
||||
|
||||
int numTests = 5;
|
||||
int numTests = 7;
|
||||
int testCase = -1;
|
||||
testCase = LevelZeroBlackBoxTests::getParamValue(argc, argv, "", "--test-case", -1);
|
||||
if (testCase < -1 || testCase >= numTests) {
|
||||
@@ -1078,7 +1341,7 @@ int main(int argc, char *argv[]) {
|
||||
std::cout << "Skipped. testBindlessImageQuery not supported\n";
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
case 4: {
|
||||
std::cout << "\ntest case: testZeExperimentalBindlessImages\n"
|
||||
<< std::endl;
|
||||
|
||||
@@ -1123,7 +1386,29 @@ int main(int argc, char *argv[]) {
|
||||
std::cout << "--bindless-images " << std::endl;
|
||||
}
|
||||
outputValidated &= testZeExperimentalBindlessImages(context, device, ss.str(), revisionId, mode);
|
||||
} break;
|
||||
case 5:
|
||||
std::cout << "\ntest case: testBindlessImageSampledBorderColor\n"
|
||||
<< std::endl;
|
||||
|
||||
if (!isIntegratedGPU && is2dImageSupported) {
|
||||
if (bindlessImages) {
|
||||
std::cout << "--bindless-images " << std::endl;
|
||||
}
|
||||
outputValidated &= testBindlessImageSampledBorderColor(context, device, ss.str(), revisionId, mode);
|
||||
} else {
|
||||
std::cout << "Skipped. testBindlessImageSampledBorderColor not supported\n";
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
std::cout << "\ntest case: testBindlessBindfulKernel\n"
|
||||
<< std::endl;
|
||||
|
||||
if (!isIntegratedGPU) {
|
||||
outputValidated &= testBindlessBindfulKernel(context, device, ss.str(), revisionId);
|
||||
} else {
|
||||
std::cout << "Skipped. testBindlessBindfulKernel not supported\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -44,11 +44,13 @@ std::string getRunPath(char *argv0) {
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[], char **envp) {
|
||||
char *argv2[] = {NULL, NULL};
|
||||
char *argv2[] = {NULL, NULL, NULL};
|
||||
auto path = getRunPath(argv[0]);
|
||||
path += fSeparator;
|
||||
path += "zello_bindless_kernel";
|
||||
argv2[0] = const_cast<char *>(path.c_str());
|
||||
const char verbose[] = "--verbose";
|
||||
argv2[1] = const_cast<char *>(verbose);
|
||||
std::vector<const char *> allEnv;
|
||||
|
||||
for (auto env = envp; *env != nullptr; env++) {
|
||||
|
||||
@@ -1492,18 +1492,9 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
|
||||
auto sshHeap = container.getIndirectHeap(NEO::HeapType::surfaceState);
|
||||
uint64_t ssBaseAddress = sshHeap->getHeapGpuBase();
|
||||
uint64_t dsBaseAddress = -1;
|
||||
uint32_t dsBaseSize = 0;
|
||||
uint32_t dsFirstBaseSize = 0;
|
||||
|
||||
auto dshHeap = container.getIndirectHeap(NEO::HeapType::dynamicState);
|
||||
if (dshHeap) {
|
||||
dsBaseAddress = dshHeap->getHeapGpuBase();
|
||||
dsFirstBaseSize = dshHeap->getHeapSizeInPages();
|
||||
}
|
||||
|
||||
uint64_t ssFirstBaseAddress = ssBaseAddress;
|
||||
uint64_t dsFirstBaseAddress = dsBaseAddress;
|
||||
|
||||
sshHeap->getSpace(sshHeap->getAvailableSpace());
|
||||
container.getHeapWithRequiredSizeAndAlignment(NEO::HeapType::surfaceState, sshHeap->getMaxAvailableSpace(), 0);
|
||||
@@ -1519,10 +1510,6 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ssBaseAddress = sshHeap->getGpuBase();
|
||||
if (dshHeap) {
|
||||
dsBaseAddress = dshHeap->getGpuBase();
|
||||
dsBaseSize = dshHeap->getHeapSizeInPages();
|
||||
}
|
||||
|
||||
cmdList.clear();
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
@@ -1537,13 +1524,8 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
if (this->dshRequired) {
|
||||
EXPECT_TRUE(sbaCmd->getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_TRUE(sbaCmd->getDynamicStateBufferSizeModifyEnable());
|
||||
EXPECT_EQ(dsBaseAddress, sbaCmd->getDynamicStateBaseAddress());
|
||||
EXPECT_EQ(dsBaseSize, sbaCmd->getDynamicStateBufferSize());
|
||||
} else {
|
||||
EXPECT_FALSE(sbaCmd->getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_FALSE(sbaCmd->getDynamicStateBufferSizeModifyEnable());
|
||||
EXPECT_EQ(0u, sbaCmd->getDynamicStateBaseAddress());
|
||||
EXPECT_EQ(0u, sbaCmd->getDynamicStateBufferSize());
|
||||
EXPECT_EQ(globalBindlessBase, sbaCmd->getDynamicStateBaseAddress());
|
||||
EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, sbaCmd->getDynamicStateBufferSize());
|
||||
}
|
||||
|
||||
EXPECT_TRUE(sbaCmd->getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
@@ -1574,13 +1556,8 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
if (this->dshRequired) {
|
||||
EXPECT_TRUE(sbaCmd->getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_TRUE(sbaCmd->getDynamicStateBufferSizeModifyEnable());
|
||||
EXPECT_EQ(dsFirstBaseAddress, sbaCmd->getDynamicStateBaseAddress());
|
||||
EXPECT_EQ(dsFirstBaseSize, sbaCmd->getDynamicStateBufferSize());
|
||||
} else {
|
||||
EXPECT_FALSE(sbaCmd->getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_FALSE(sbaCmd->getDynamicStateBufferSizeModifyEnable());
|
||||
EXPECT_EQ(0u, sbaCmd->getDynamicStateBaseAddress());
|
||||
EXPECT_EQ(0u, sbaCmd->getDynamicStateBufferSize());
|
||||
EXPECT_EQ(globalBindlessBase, sbaCmd->getDynamicStateBaseAddress());
|
||||
EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, sbaCmd->getDynamicStateBufferSize());
|
||||
}
|
||||
|
||||
EXPECT_TRUE(sbaCmd->getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
@@ -1656,7 +1633,7 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
EXPECT_TRUE(sbaCmd->getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_TRUE(sbaCmd->getDynamicStateBufferSizeModifyEnable());
|
||||
EXPECT_EQ(globalBindlessBase, sbaCmd->getDynamicStateBaseAddress());
|
||||
EXPECT_EQ(MemoryConstants::pageSize64k, sbaCmd->getDynamicStateBufferSize());
|
||||
EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, sbaCmd->getDynamicStateBufferSize());
|
||||
|
||||
EXPECT_TRUE(sbaCmd->getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(globalBindlessBase, sbaCmd->getBindlessSurfaceStateBaseAddress());
|
||||
|
||||
@@ -221,7 +221,7 @@ HWTEST2_F(CommandQueueProgramSBATest,
|
||||
EXPECT_EQ(surfaceStateCount, cmdSba->getBindlessSurfaceStateSize());
|
||||
|
||||
EXPECT_EQ(globalHeapsBase, cmdSba->getDynamicStateBaseAddress());
|
||||
EXPECT_EQ(MemoryConstants::pageSize64k, cmdSba->getDynamicStateBufferSize());
|
||||
EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, cmdSba->getDynamicStateBufferSize());
|
||||
EXPECT_TRUE(cmdSba->getDynamicStateBufferSizeModifyEnable());
|
||||
EXPECT_TRUE(cmdSba->getDynamicStateBaseAddressModifyEnable());
|
||||
|
||||
|
||||
@@ -58,6 +58,10 @@ uint32_t EncodeStates<Family>::copySamplerState(IndirectHeap *dsh,
|
||||
uint32_t borderColorOffsetInDsh = 0;
|
||||
if (!bindlessHeapHelper || (!bindlessHeapHelper->isGlobalDshSupported())) {
|
||||
borderColorOffsetInDsh = static_cast<uint32_t>(dsh->getUsed());
|
||||
// add offset of graphics allocation base address relative to heap base address
|
||||
if (bindlessHeapHelper) {
|
||||
borderColorOffsetInDsh += static_cast<uint32_t>(ptrDiff(dsh->getGpuBase(), bindlessHeapHelper->getGlobalHeapsBase()));
|
||||
}
|
||||
auto borderColor = dsh->getSpace(borderColorSize);
|
||||
|
||||
memcpy_s(borderColor, borderColorSize, ptrOffset(fnDynamicStateHeap, borderColorOffset),
|
||||
|
||||
@@ -462,6 +462,8 @@ void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &
|
||||
|
||||
if (device.getBindlessHeapsHelper()) {
|
||||
bindlessSurfStateBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
globalHeapsBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
useGlobalSshAndDsh = true;
|
||||
}
|
||||
|
||||
StateBaseAddressHelperArgs<Family> stateBaseAddressHelperArgs = {
|
||||
|
||||
@@ -213,6 +213,11 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
kernelDescriptor.payloadMappings.samplerTable.numSamplers, kernelDescriptor.payloadMappings.samplerTable.borderColor,
|
||||
args.dispatchInterface->getDynamicStateHeapData(),
|
||||
args.device->getBindlessHeapsHelper(), rootDeviceEnvironment);
|
||||
|
||||
if (args.device->getBindlessHeapsHelper() && !args.device->getBindlessHeapsHelper()->isGlobalDshSupported()) {
|
||||
// add offset of graphics allocation base address relative to heap base address
|
||||
samplerStateOffset += static_cast<uint32_t>(ptrDiff(dsHeap->getGpuBase(), args.device->getBindlessHeapsHelper()->getGlobalHeapsBase()));
|
||||
}
|
||||
}
|
||||
|
||||
idd.setSamplerStatePointer(samplerStateOffset);
|
||||
@@ -307,7 +312,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
args.partitionCount > 1, // multiOsContextCapable
|
||||
args.isRcs, // isRcs
|
||||
container.doubleSbaWaRef(), // doubleSbaWa
|
||||
heaplessModeEnabled, // heaplessModeEnabled
|
||||
heaplessModeEnabled // heaplessModeEnabled
|
||||
};
|
||||
EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
|
||||
container.setDirtyStateForAllHeaps(false);
|
||||
@@ -670,6 +675,8 @@ void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &
|
||||
|
||||
if (device.getBindlessHeapsHelper()) {
|
||||
bindlessSurfStateBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
globalHeapsBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
useGlobalSshAndDsh = true;
|
||||
}
|
||||
|
||||
StateBaseAddressHelperArgs<Family> stateBaseAddressHelperArgs = {
|
||||
|
||||
@@ -1701,9 +1701,13 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddressCommon(
|
||||
auto stateBaseAddressCmdOffset = csrCommandStream.getUsed();
|
||||
auto instructionHeapBaseAddress = getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, getMemoryManager()->isLocalMemoryUsedForIsa(rootDeviceIndex));
|
||||
auto bindlessSurfStateBase = 0ull;
|
||||
auto globalHeapsBase = 0ull;
|
||||
bool useGlobalSshAndDsh = false;
|
||||
|
||||
if (device.getBindlessHeapsHelper()) {
|
||||
bindlessSurfStateBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
globalHeapsBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
useGlobalSshAndDsh = true;
|
||||
}
|
||||
|
||||
STATE_BASE_ADDRESS stateBaseAddressCmd;
|
||||
@@ -1711,7 +1715,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddressCommon(
|
||||
generalStateBaseAddress, // generalStateBaseAddress
|
||||
indirectObjectStateBaseAddress, // indirectObjectHeapBaseAddress
|
||||
instructionHeapBaseAddress, // instructionHeapBaseAddress
|
||||
0, // globalHeapsBaseAddress
|
||||
globalHeapsBase, // globalHeapsBaseAddress
|
||||
0, // surfaceStateBaseAddress
|
||||
bindlessSurfStateBase, // bindlessSurfaceStateBaseAddress
|
||||
&stateBaseAddressCmd, // stateBaseAddressCmd
|
||||
@@ -1726,7 +1730,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddressCommon(
|
||||
this->lastMemoryCompressionState, // memoryCompressionState
|
||||
true, // setInstructionStateBaseAddress
|
||||
setGeneralStateBaseAddress, // setGeneralStateBaseAddress
|
||||
false, // useGlobalHeapsBaseAddress
|
||||
useGlobalSshAndDsh, // useGlobalHeapsBaseAddress
|
||||
isMultiOsContextCapable(), // isMultiOsContextCapable
|
||||
areMultipleSubDevicesInContext, // areMultipleSubDevicesInContext
|
||||
false, // overrideSurfaceStateBaseAddress
|
||||
|
||||
@@ -72,10 +72,7 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
||||
args.stateBaseAddressCmd->setDynamicStateBaseAddressModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setDynamicStateBufferSizeModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setDynamicStateBaseAddress(args.globalHeapsBaseAddress);
|
||||
args.stateBaseAddressCmd->setDynamicStateBufferSize(MemoryConstants::pageSize64k);
|
||||
|
||||
args.stateBaseAddressCmd->setSurfaceStateBaseAddressModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setSurfaceStateBaseAddress(args.globalHeapsBaseAddress);
|
||||
args.stateBaseAddressCmd->setDynamicStateBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
|
||||
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.globalHeapsBaseAddress);
|
||||
@@ -87,11 +84,11 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
||||
args.stateBaseAddressCmd->setDynamicStateBaseAddress(args.dsh->getHeapGpuBase());
|
||||
args.stateBaseAddressCmd->setDynamicStateBufferSize(args.dsh->getHeapSizeInPages());
|
||||
}
|
||||
}
|
||||
|
||||
if (args.ssh) {
|
||||
args.stateBaseAddressCmd->setSurfaceStateBaseAddressModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());
|
||||
}
|
||||
if (args.ssh) {
|
||||
args.stateBaseAddressCmd->setSurfaceStateBaseAddressModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());
|
||||
}
|
||||
|
||||
if (args.setInstructionStateBaseAddress) {
|
||||
|
||||
@@ -38,6 +38,12 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
|
||||
args.stateBaseAddressCmd->setGeneralStateBufferSizeModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setGeneralStateBufferSize(0xfffff);
|
||||
}
|
||||
|
||||
if (args.sbaProperties->dynamicStateBaseAddress.value != StreamProperty64::initValue) {
|
||||
args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddress(args.sbaProperties->dynamicStateBaseAddress.value);
|
||||
args.stateBaseAddressCmd->setBindlessSamplerStateBufferSize(static_cast<uint32_t>(args.sbaProperties->dynamicStateSize.value));
|
||||
args.stateBaseAddressCmd->setBindlessSamplerStateBaseAddressModifyEnable(true);
|
||||
}
|
||||
}
|
||||
if (args.setGeneralStateBaseAddress && is64bit) {
|
||||
args.stateBaseAddressCmd->setGeneralStateBaseAddress(args.gmmHelper->decanonize(args.indirectObjectHeapBaseAddress));
|
||||
|
||||
@@ -137,6 +137,7 @@ HWTEST_F(BindlessCommandEncodeStatesTest, GivenBindlessHeapHelperAndGlobalDshNot
|
||||
auto mockHelper = std::make_unique<MockBindlesHeapsHelper>(pDevice,
|
||||
pDevice->getNumGenericSubDevices() > 1);
|
||||
mockHelper->globalBindlessDsh = false;
|
||||
auto globalBase = mockHelper->getGlobalHeapsBase();
|
||||
|
||||
pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(mockHelper.release());
|
||||
|
||||
@@ -155,7 +156,7 @@ HWTEST_F(BindlessCommandEncodeStatesTest, GivenBindlessHeapHelperAndGlobalDshNot
|
||||
|
||||
auto usedBefore = dsh->getUsed();
|
||||
EncodeStates<FamilyType>::copySamplerState(dsh, borderColorSize, numSamplers, 0, memory, pDevice->getBindlessHeapsHelper(), pDevice->getRootDeviceEnvironment());
|
||||
auto expectedValue = usedBefore;
|
||||
auto expectedValue = usedBefore + ptrDiff(dsh->getGpuBase(), globalBase);
|
||||
auto usedAfter = dsh->getUsed();
|
||||
|
||||
EXPECT_EQ(alignUp(usedBefore + sizeof(SAMPLER_BORDER_COLOR_STATE), INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE) + sizeof(SAMPLER_STATE), usedAfter);
|
||||
|
||||
@@ -98,6 +98,7 @@ HWTEST2_F(SbaForBindlessTests, givenGlobalBindlessBaseAddressWhenProgramStateBas
|
||||
StateBaseAddressHelperArgs<FamilyType> args = createSbaHelperArgs<FamilyType>(cmd, pDevice->getGmmHelper());
|
||||
args.globalHeapsBaseAddress = globalBindlessHeapsBaseAddress;
|
||||
args.useGlobalHeapsBaseAddress = true;
|
||||
args.ssh = &ssh;
|
||||
|
||||
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(args);
|
||||
|
||||
@@ -115,7 +116,8 @@ HWTEST2_F(SbaForBindlessTests, givenGlobalBindlessBaseAddressWhenProgramStateBas
|
||||
EXPECT_EQ(globalBindlessHeapsBaseAddress, cmd->getDynamicStateBaseAddress());
|
||||
|
||||
EXPECT_TRUE(cmd->getSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(globalBindlessHeapsBaseAddress, cmd->getSurfaceStateBaseAddress());
|
||||
EXPECT_NE(globalBindlessHeapsBaseAddress, cmd->getSurfaceStateBaseAddress());
|
||||
EXPECT_EQ(ssh.getHeapGpuBase(), cmd->getSurfaceStateBaseAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(SbaForBindlessTests,
|
||||
|
||||
Reference in New Issue
Block a user