mirror of https://github.com/intel/gmmlib.git
394 lines
18 KiB
C++
394 lines
18 KiB
C++
/*==============================================================================
|
|
Copyright(c) 2017 Intel Corporation
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a
|
|
copy of this software and associated documentation files(the "Software"),
|
|
to deal in the Software without restriction, including without limitation
|
|
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
and / or sell copies of the Software, and to permit persons to whom the
|
|
Software is furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included
|
|
in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
OTHER DEALINGS IN THE SOFTWARE.
|
|
============================================================================*/
|
|
|
|
#include "Internal/Common/GmmLibInc.h"
|
|
|
|
/////////////////////////////////////////////////////////////////////////////////////
|
|
/// This function does any special-case conversion from client-provided pseudo creation
|
|
/// parameters to actual parameters for Hiz, CCS, SeparateStencil and Depth buffers.
|
|
///
|
|
/// @param[in] pTexInfo: Reference to ::GMM_TEXTURE_INFO
|
|
///
|
|
/////////////////////////////////////////////////////////////////////////////////////
|
|
GMM_STATUS GmmLib::GmmTextureCalc::PreProcessTexSpecialCases(GMM_TEXTURE_INFO* pTexInfo)
|
|
{
|
|
GMM_STATUS Status = GMM_SUCCESS;
|
|
const GMM_PLATFORM_INFO* pPlatform = GMM_OVERRIDE_PLATFORM_INFO(pTexInfo);
|
|
|
|
if (!pTexInfo->Flags.Gpu.CCS &&
|
|
!pTexInfo->Flags.Gpu.MCS &&
|
|
!pTexInfo->Flags.Gpu.HiZ &&
|
|
!pTexInfo->Flags.Gpu.SeparateStencil &&
|
|
!pTexInfo->Flags.Gpu.MMC)
|
|
{
|
|
// Fast-out for non-special-cases.
|
|
}
|
|
else if (pTexInfo->Flags.Gpu.HiZ) // ######################################
|
|
{
|
|
// With HiZ surface creation, clients send the size/etc. parameters of
|
|
// the associated Depth Buffer--and here we convert to the appropriate
|
|
// HiZ creation parameters...
|
|
|
|
if ((pTexInfo->BaseWidth > 0) &&
|
|
(pTexInfo->BaseWidth <= pPlatform->HiZ.MaxWidth) &&
|
|
(pTexInfo->BaseHeight > 0)&&
|
|
(pTexInfo->BaseHeight <= pPlatform->HiZ.MaxHeight) &&
|
|
(pTexInfo->Depth <= (
|
|
(pTexInfo->Type == RESOURCE_3D) ?
|
|
pPlatform->HiZ.MaxDepth : 1)) &&
|
|
(pTexInfo->ArraySize <= (
|
|
(pTexInfo->Type == RESOURCE_3D) ?
|
|
1 :
|
|
(pTexInfo->Type == RESOURCE_CUBE) ?
|
|
pPlatform->HiZ.MaxArraySize / 6 :
|
|
pPlatform->HiZ.MaxArraySize)) &&
|
|
// SKL+ does not support HiZ surfaces for 1D and 3D surfaces
|
|
((GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) < IGFX_GEN9_CORE) ||
|
|
(pTexInfo->Type != RESOURCE_1D && pTexInfo->Type != RESOURCE_3D)))
|
|
{
|
|
uint32_t Z_Width, Z_Height, Z_Depth;
|
|
|
|
// Latch Z_[Width/Height/Depth]...
|
|
Z_Width = GFX_ULONG_CAST(pTexInfo->BaseWidth);
|
|
Z_Height = pTexInfo->BaseHeight;
|
|
if ((pTexInfo->Type == RESOURCE_1D) ||
|
|
(pTexInfo->Type == RESOURCE_2D))
|
|
{
|
|
Z_Depth = GFX_MAX(pTexInfo->ArraySize, 1);
|
|
}
|
|
else if (pTexInfo->Type == RESOURCE_3D)
|
|
{
|
|
Z_Depth = pTexInfo->Depth;
|
|
}
|
|
else if (pTexInfo->Type == RESOURCE_CUBE)
|
|
{
|
|
// HW doesn't allow HiZ cube arrays, but GMM is allowing because
|
|
// clients will redescribe depth/HiZ cube arrays as 2D arrays.
|
|
Z_Depth = 6 * GFX_MAX(pTexInfo->ArraySize, 1);
|
|
}
|
|
else
|
|
{
|
|
__GMM_ASSERT(0); // Illegal--Should have caught at upper IF check.
|
|
Z_Depth = 0;
|
|
}
|
|
|
|
// HZ_[Width/Height/QPitch] Calculation...
|
|
{
|
|
uint32_t h0, h1, hL, i, NumSamples, QPitch, Z_HeightL;
|
|
uint32_t HZ_HAlign = 16, HZ_VAlign = 8;
|
|
|
|
if (GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) > IGFX_GEN10_CORE)
|
|
{
|
|
HZ_VAlign = 16;
|
|
}
|
|
|
|
// HZ operates in pixel space starting from SKL. So, it does not care
|
|
// whether the depth buffer is in MSAA mode or not.
|
|
NumSamples =
|
|
(GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) >= IGFX_GEN9_CORE) ?
|
|
1 : pTexInfo->MSAA.NumSamples;
|
|
|
|
pTexInfo->BaseWidth = ExpandWidth(Z_Width, HZ_HAlign, NumSamples);
|
|
|
|
h0 = ExpandHeight(Z_Height, HZ_VAlign, NumSamples);
|
|
|
|
Z_Height = __GmmTexGetMipHeight(pTexInfo, 1);
|
|
h1 = ExpandHeight(Z_Height, HZ_VAlign, NumSamples);
|
|
|
|
if (GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) >= IGFX_GEN8_CORE)
|
|
{
|
|
if (pTexInfo->Type == RESOURCE_3D)
|
|
{
|
|
for (i = 0, Z_HeightL = 0; i <= pTexInfo->MaxLod; i++)
|
|
{
|
|
Z_Height = __GmmTexGetMipHeight(pTexInfo, i);
|
|
hL = ExpandHeight(Z_Height, HZ_VAlign, NumSamples);
|
|
Z_HeightL += (hL * GFX_MAX(1, (Z_Depth / GFX_2_TO_POWER_OF(i))));
|
|
}
|
|
|
|
pTexInfo->ArraySize = 0;
|
|
pTexInfo->BaseHeight = Z_HeightL / 2;
|
|
}
|
|
else
|
|
{
|
|
for (i = 2, Z_HeightL = 0; i <= pTexInfo->MaxLod; i++)
|
|
{
|
|
Z_Height = __GmmTexGetMipHeight(pTexInfo, i);
|
|
Z_HeightL += ExpandHeight(Z_Height, HZ_VAlign, NumSamples);
|
|
}
|
|
|
|
QPitch =
|
|
(pTexInfo->MaxLod > 0) ?
|
|
(h0 + GFX_MAX(h1, Z_HeightL)) :
|
|
h0;
|
|
QPitch /= 2;
|
|
pTexInfo->ArraySize = Z_Depth;
|
|
pTexInfo->BaseHeight = QPitch;
|
|
}
|
|
|
|
pTexInfo->Alignment.HAlign = HZ_HAlign;
|
|
pTexInfo->Alignment.VAlign = HZ_VAlign / 2;
|
|
}
|
|
else //if (GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) >= IGFX_GEN7_CORE)
|
|
{
|
|
if (pTexInfo->Type == RESOURCE_3D)
|
|
{
|
|
for (i = 0, Z_HeightL = 0; i <= pTexInfo->MaxLod; i++)
|
|
{
|
|
hL = ExpandHeight(Z_Height >> i, HZ_VAlign, NumSamples);
|
|
Z_HeightL += (hL * GFX_MAX(1, (Z_Depth / GFX_2_TO_POWER_OF(i))));
|
|
}
|
|
|
|
pTexInfo->BaseHeight = Z_HeightL / 2;
|
|
}
|
|
else
|
|
{
|
|
QPitch = h0 + h1 + 12 * HZ_VAlign;
|
|
|
|
pTexInfo->BaseHeight = GFX_CEIL_DIV((QPitch * Z_Depth / 2), 8) * 8;
|
|
}
|
|
|
|
pTexInfo->ArraySize = 1;
|
|
}
|
|
}
|
|
|
|
/// Native HZ Params //////////////////////////////////////////////////
|
|
pTexInfo->BitsPerPixel = 8;
|
|
pTexInfo->Depth = 1;
|
|
pTexInfo->Format = GMM_FORMAT_GENERIC_8BIT;
|
|
pTexInfo->MaxLod = 0;
|
|
pTexInfo->MSAA.NumSamples = 1;
|
|
pTexInfo->MSAA.SamplePattern = GMM_MSAA_DISABLED;
|
|
pTexInfo->Type = RESOURCE_2D;
|
|
|
|
// HiZ Always Tile-Y
|
|
pTexInfo->Flags.Info.Linear = false;
|
|
pTexInfo->Flags.Info.TiledW = false;
|
|
pTexInfo->Flags.Info.TiledX = false;
|
|
pTexInfo->Flags.Info.TiledY = true;
|
|
pTexInfo->Flags.Info.TiledYf = false;
|
|
pTexInfo->Flags.Info.TiledYs = false;
|
|
}
|
|
else
|
|
{
|
|
GMM_ASSERTDPF(0, "Illegal HiZ creation parameters!");
|
|
Status = GMM_ERROR;
|
|
}
|
|
} // HiZ
|
|
else if (pTexInfo->Flags.Gpu.CCS ||
|
|
pTexInfo->Flags.Gpu.MCS) // ######################################
|
|
{
|
|
// With CCS surface creation, clients send height, width, depth, etc. of
|
|
// the associated RenderTarget--and here we convert to the appropriate CCS
|
|
// creation parameters...
|
|
__GMM_ASSERT((pTexInfo->Flags.Info.Linear + pTexInfo->Flags.Info.TiledW + pTexInfo->Flags.Info.TiledX + pTexInfo->Flags.Info.TiledY) == 1);
|
|
__GMM_ASSERT((pTexInfo->MSAA.NumSamples == 1) || (pTexInfo->MSAA.NumSamples == 2) || (pTexInfo->MSAA.NumSamples == 4) ||
|
|
(pTexInfo->MSAA.NumSamples == 8) || (pTexInfo->MSAA.NumSamples == 16));
|
|
|
|
if (pTexInfo->MSAA.NumSamples > 1 &&
|
|
(GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) <= IGFX_GEN10_CORE || pTexInfo->Flags.Gpu.MCS)) // CCS for MSAA Compression
|
|
{
|
|
//__GMM_ASSERT(!pTexInfo->Flags.Gpu.UnifiedAuxSurface);
|
|
|
|
if ((pTexInfo->MSAA.NumSamples == 2) || (pTexInfo->MSAA.NumSamples == 4))
|
|
{
|
|
pTexInfo->BitsPerPixel = 8;
|
|
pTexInfo->Format = GMM_FORMAT_R8_UINT;
|
|
}
|
|
else if(pTexInfo->MSAA.NumSamples == 8)
|
|
{
|
|
pTexInfo->BitsPerPixel = 32;
|
|
pTexInfo->Format = GMM_FORMAT_R32_UINT;
|
|
}
|
|
else //if(pTexInfo->MSAA.NumSamples == 16)
|
|
{
|
|
pTexInfo->BitsPerPixel = 64;
|
|
pTexInfo->Format = GMM_FORMAT_GENERIC_64BIT;
|
|
}
|
|
|
|
if ((Status = __GmmTexFillHAlignVAlign(pTexInfo)) != GMM_SUCCESS) // Need to get our alignment (matching RT) before overwriting our RT's MSAA setting.
|
|
{
|
|
return Status;
|
|
}
|
|
pTexInfo->MSAA.NumSamples = 1; // CCS itself isn't MSAA'ed.
|
|
pTexInfo->Flags.Gpu.__MsaaTileMcs = 1; // Gmm flag to recognize MCS different than CCS
|
|
}
|
|
else // Non-MSAA CCS Use (i.e. Render Target Fast Clear)
|
|
{
|
|
if (!pTexInfo->Flags.Info.Linear &&
|
|
!pTexInfo->Flags.Info.TiledW &&
|
|
((GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) < IGFX_GEN9_CORE) ||
|
|
!pTexInfo->Flags.Info.TiledX) &&
|
|
((GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) <= IGFX_GEN10_CORE) ||
|
|
(pTexInfo->Flags.Info.TiledY || pTexInfo->Flags.Info.TiledYs)) && //!Yf - deprecate Yf
|
|
((GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) >= IGFX_GEN8_CORE) ||
|
|
((pTexInfo->MaxLod == 0) &&
|
|
(pTexInfo->ArraySize <= 1))) &&
|
|
((GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) > IGFX_GEN10_CORE) ||
|
|
((pTexInfo->BitsPerPixel == 32) ||
|
|
(pTexInfo->BitsPerPixel == 64) ||
|
|
(pTexInfo->BitsPerPixel == 128))))
|
|
{
|
|
// For non-MSAA CCS usage, the Bspec has four tables of
|
|
// requirements:
|
|
// (1) RT Alignment (GMM Don't Care: Occurs Naturally)
|
|
// (2) ClearRect Alignment
|
|
// (3) ClearRect Scaling (GMM Don't Care: GHAL3D Matter)
|
|
// (4) Non-MSAA CCS Sizing
|
|
|
|
// Gen8+:
|
|
// Since mip-mapped and arrayed surfaces are supported, we
|
|
// deal with alignment later at per mip level. Here, we set
|
|
// tiling type only. TileX is not supported on Gen9+.
|
|
// Pre-Gen8:
|
|
// (!) For all the above, the bspec has separate entries for
|
|
// 32/64/128bpp--and then deals with PIXEL widths--Here,
|
|
// though, we will unify by considering 8bpp table entries
|
|
// (unlisted--i.e. do the math)--and deal with BYTE widths.
|
|
|
|
// (1) RT Alignment -- The surface width and height don't
|
|
// need to be padded to RT CL granularity. On HSW, all tiled
|
|
// RT's will have appropriate alignment (given 4KB surface
|
|
// base and no mip-map support) and appropriate padding
|
|
// (due to tile padding). On BDW+, GMM uses H/VALIGN that
|
|
// will guarantee the MCS RT alignment for all subresources.
|
|
|
|
// (2) ClearRect Alignment -- I.e. FastClears must be done
|
|
// with certain granularity:
|
|
// TileY: 512 Bytes x 128 Lines
|
|
// TileX: 1024 Bytes x 64 Lines
|
|
// So a CCS must be sized to match that granularity (though
|
|
// the RT itself need not be fully padded to that
|
|
// granularity to use FastClear).
|
|
|
|
// (4) Non-MSAA CCS Sizing -- CCS sizing is based on the
|
|
// size of the FastClear (with granularity padding) for the
|
|
// paired RT. CCS's (byte widths and heights) are scaled
|
|
// down from their RT's by:
|
|
// TileY: 32 x 32
|
|
// TileX: 64 x 16
|
|
|
|
// ### Example #############################################
|
|
// RT: 800x600, 32bpp, TileY
|
|
// 8bpp: 3200x600
|
|
// FastClear: 3584x640 (for TileY FastClear Granularity of 512x128)
|
|
// CCS: 112x20 (for TileY RT:CCS Sizing Downscale of 32x32)
|
|
|
|
if(GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) >= IGFX_GEN8_CORE)
|
|
{
|
|
pTexInfo->Flags.Gpu.__NonMsaaTileYCcs = pTexInfo->Flags.Info.TiledY || pTexInfo->Flags.Info.TiledYf || pTexInfo->Flags.Info.TiledYs;
|
|
pTexInfo->Flags.Gpu.__NonMsaaTileXCcs = pTexInfo->Flags.Info.TiledX;
|
|
}
|
|
else
|
|
{
|
|
// Pixel Alignment doubled for HSW GT3 and GTx E0+ (Not required for BDW+)
|
|
uint32_t AlignmentFactor = pGmmGlobalContext->GetWaTable().WaDoubleFastClearWidthAlignment ? 2 : 1;
|
|
|
|
pTexInfo->BaseWidth = pTexInfo->BaseWidth * pTexInfo->BitsPerPixel / 8;
|
|
pTexInfo->BitsPerPixel = 8;
|
|
pTexInfo->Format = GMM_FORMAT_R8_UINT;
|
|
|
|
if (pTexInfo->Flags.Info.TiledY) //-------- Fast Clear Granularity
|
|
{ // /--- RT:CCS Sizing Downscale
|
|
pTexInfo->BaseWidth = GFX_ALIGN(pTexInfo->BaseWidth, 512 * AlignmentFactor) / 32;
|
|
pTexInfo->BaseHeight = GFX_ALIGN(pTexInfo->BaseHeight, 128) / 32;
|
|
}
|
|
else //if(pTexInfo->Flags.Info.TiledX)
|
|
{
|
|
pTexInfo->BaseWidth = GFX_ALIGN(pTexInfo->BaseWidth, 1024 * AlignmentFactor) / 64;
|
|
pTexInfo->BaseHeight = GFX_ALIGN(pTexInfo->BaseHeight, 64) / 16;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
GMM_ASSERTDPF(0, "Illegal CCS creation parameters!");
|
|
Status = GMM_ERROR;
|
|
}
|
|
}
|
|
|
|
if (!pTexInfo->Flags.Gpu.__NonMsaaLinearCCS)
|
|
{
|
|
// CCS Always Tile-Y (Even for Non-MSAA FastClear.)
|
|
pTexInfo->Flags.Info.Linear = false;
|
|
pTexInfo->Flags.Info.TiledW = false;
|
|
pTexInfo->Flags.Info.TiledX = false;
|
|
pTexInfo->Flags.Info.TiledY = true;
|
|
pTexInfo->Flags.Info.TiledYf = false;
|
|
pTexInfo->Flags.Info.TiledYs = false;
|
|
|
|
//Clear compression request in CCS
|
|
pTexInfo->Flags.Info.RenderCompressed = false;
|
|
pTexInfo->Flags.Info.MediaCompressed = false;
|
|
}
|
|
|
|
} // CCS
|
|
else if (pTexInfo->Flags.Gpu.SeparateStencil) // ##########################
|
|
{
|
|
// Seperate stencil sizing is based on the associated depth buffer
|
|
// size, however UMD manages this sizing, and GMM will allocate any
|
|
// arbitrarily sized stencil. Stencils do have specific tiling
|
|
// requirements however, which is handled below.
|
|
|
|
if ((pTexInfo->BaseWidth > 0) &&
|
|
(pTexInfo->BaseHeight > 0))
|
|
{
|
|
__GMM_ASSERT(pTexInfo->BitsPerPixel == 8);
|
|
|
|
if (GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) < IGFX_GEN7_CORE)
|
|
{
|
|
GMM_ASSERTDPF((pTexInfo->MaxLod == 0), "Stencil Buffer LOD's not supported!");
|
|
}
|
|
|
|
// Separate Stencil Tile-W Gen8-Gen10, otherwise Tile-Y
|
|
pTexInfo->Flags.Info.Linear = false;
|
|
pTexInfo->Flags.Info.TiledX = false;
|
|
pTexInfo->Flags.Info.TiledYf = false;
|
|
pTexInfo->Flags.Info.TiledYs = false;
|
|
pTexInfo->Flags.Info.TiledW = false;
|
|
pTexInfo->Flags.Info.TiledY = false;
|
|
|
|
if (GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) >= IGFX_GEN8_CORE &&
|
|
GFX_GET_CURRENT_RENDERCORE(pPlatform->Platform) <= IGFX_GEN10_CORE)
|
|
{
|
|
pTexInfo->Flags.Info.TiledW = true;
|
|
}
|
|
else
|
|
{
|
|
pTexInfo->Flags.Info.TiledY = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
GMM_ASSERTDPF(0, "Illegal Separate Stencil creation parameters!");
|
|
Status = GMM_ERROR;
|
|
}
|
|
} // Separate Stencil
|
|
else if (pTexInfo->Flags.Gpu.MMC && pTexInfo->Flags.Gpu.UnifiedAuxSurface)
|
|
{
|
|
pTexInfo->Flags.Gpu.__NonMsaaLinearCCS = true;
|
|
pTexInfo->Flags.Info.Linear = true;
|
|
}
|
|
|
|
return Status;
|
|
}
|