且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

RDS SQL Server - 专题分享 - 巧用执行计划缓存之数据类型隐式转换

更新时间:2022-06-09 05:49:10

摘要

SQL Server数据库基表数据类型隐式转换,会导致Index Scan或者Clustered Index Scan的问题,这篇文章分享如何巧用执行计划缓存来发现数据类型隐式转换的查询语句,从而可以有针对性的优化查询,解决高CPU使用率的问题。

问题引入

数据类型转化是导致SQL Server高CPU使用率的又一大杀手,详情参见之前的云栖社区文章:RDS SQL Server - ***实践 - 高CPU使用率系列之数据类型转换。SQL Server对基表数据类型转换会导致Index Scan或者Clustered Index Scan,进而导致IO使用率的大幅上升,最终导致CPU的使用率大幅升高。这篇文章是从执行计划缓存缓存的角度来找出导致数据类型转换的查询语句,进而做有针对性的查询语句优化,来破解高CPU使用率的问题。

测试环境

为了更好的展示从执行计划缓存缓存中找出导致数据类型转化的查询语句,我们先建立测试环境。

-- Create testing database
IF DB_ID('TestDb') IS NULL
    CREATE DATABASE TestDb;
GO

USE TestDb
GO

-- create demo table SalesOrder
IF OBJECT_ID('dbo.SalesOrder', 'U') IS NOT NULL
BEGIN
    TRUNCATE TABLE dbo.SalesOrder
    DROP TABLE dbo.SalesOrder
END
GO

CREATE TABLE dbo.SalesOrder
(
    RowID INT IDENTITY(1,1) NOT NULL
    , OrderID UNIQUEIDENTIFIER NOT NULL
    , ItemID INT NOT NULL
    , UserID INT NOT NULL
    , OrderQty INT NOT NULL
    , Price DECIMAL(8,2) NOT NULL
    , OrderDate DATETIME NOT NULL 
        CONSTRAINT DF_OrderDate DEFAULT(GETDATE())
    , LastUpdateTime DATETIME NULL
    , OrderComment NVARCHAR(100) NULL
    , CONSTRAINT PK_SalesOrder PRIMARY KEY(
        OrderID
    )
);

-- data init for 2 M records.
;WITH a 
AS (
    SELECT * 
    FROM (VALUES(1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) AS a(a)
), RoundData
AS(
SELECT TOP(2000000)
    OrderID = NEWID()
    ,ItemIDRound = abs(checksum(newid()))
    ,Price = a.a * b.a * 10
    ,OrderQty = a.a + b.a + c.a + d.a + e.a + f.a + g.a + h.a
FROM a, a AS b, a AS c, a AS d, a AS e, a AS f, a AS g, a AS h
), DATA
AS(
SELECT 
    OrderID
    ,ItemID = cast(ROUND((1300 * (ItemIDRound*1./cast(replace(ItemIDRound, ItemIDRound, '1' + replicate('0', len(ItemIDRound))) as bigint)) + 101), 0) as int)
    ,UserID = cast(ROUND((500 * (ItemIDRound*1./cast(replace(ItemIDRound, ItemIDRound, '1' + replicate('0', len(ItemIDRound))) as bigint)) + 10000), 0) as int)
    ,OrderQty
    ,Price = cast(Price AS DECIMAL(8,2))
    ,OrderDate = dateadd(day, -cast(ROUND((50 * (ItemIDRound*1./cast(replace(ItemIDRound, ItemIDRound, '1' + replicate('0', len(ItemIDRound))) as bigint)) + 1), 0) as int) ,GETDATE())
FROM RoundData
)
INSERT INTO dbo.SalesOrder(OrderID, ItemID, UserID, OrderQty, Price, OrderDate, LastUpdateTime, OrderComment)
SELECT 
    OrderID
    , ItemID
    , UserID
    , OrderQty
    , Price
    , OrderDate
    , LastUpdateTime = OrderDate
    , OrderComment = N'User ' + CAST(UserID AS NVARCHAR(8)) + N' purchased item ' + CAST(ItemID AS NVARCHAR(8))
FROM DATA;
GO

--===============query

USE [TestDb]
GO
ALTER TABLE dbo.SalesOrder
ALTER COLUMN UserID CHAR(8) NULL
GO

EXEC sys.sp_help 'dbo.SalesOrder'

USE [TestDb]
GO
SELECT
    ItemID, OrderQty, Price
FROM dbo.SalesOrder
WHERE UserID = 10057

SELECT TOP 100
    ItemID, OrderQty, Price
FROM dbo.SalesOrder
WHERE UserID = 10058
    AND OrderDate >= DATEADD(MONTH, -1, GETDATE())
    AND OrderDate <= GETDATE();


USE [TestDb]
GO
SELECT
    ItemID, OrderQty, Price
FROM dbo.SalesOrder
WHERE UserID = 10059

SELECT TOP 100
    ItemID, OrderQty, Price
FROM dbo.SalesOrder
WHERE UserID = 10061
    AND OrderDate >= DATEADD(MONTH, -1, GETDATE())
    AND OrderDate <= GETDATE();

查看SalesOrder表结构,我们很清楚的看到UserID数据类型是CHAR(8),而查询WHERE语句中的WHERE UserID = XXXX中,等号右边的数据类型为INT,这会导致SQL Server将数据类型优先级低的数据类型CHAR转化为INT,SQL Server需要将这个表中的200万条记录的UserID从CHAR(8)转化为INT。所以,必须进行Scan操作,从而导致高CPU使用率。
RDS SQL Server - 专题分享 - 巧用执行计划缓存之数据类型隐式转换

从执行计划来看,也的确导致了数据类型的隐式转换:
RDS SQL Server - 专题分享 - 巧用执行计划缓存之数据类型隐式转换

解决方法

从测试环境部分,我们发现的确导致了数据类型的隐式转换。以下短短100行代码,可以从执行计划缓存中找出导致数据类型隐式转化的查询语句和执行计划的详细信息。

USE testdb
GO
DECLARE
    @db_name SYSNAME
;
SET
    @db_name = QUOTENAME(DB_NAME())
;

;WITH XMLNAMESPACES (DEFAULT 'http://schemas.microsoft.com/sqlserver/2004/07/showplan')
, planCache
AS(
    SELECT 
       stmt.value('(@StatementText)[1]', 'varchar(max)') AS stmt, 
       n.t.value('(ScalarOperator/Identifier/ColumnReference/@Database)[1]', 'sysname') AS DatabaseName,
       REPLACE(REPLACE(n.t.value('(ScalarOperator/Identifier/ColumnReference/@Schema)[1]', 'sysname'), '[', ''), ']', '') AS SchemaName, 
       REPLACE(REPLACE(n.t.value('(ScalarOperator/Identifier/ColumnReference/@Table)[1]', 'sysname'), '[', ''), ']', '') AS ObjectName, 
       REPLACE(REPLACE(n.t.value('(ScalarOperator/Identifier/ColumnReference/@Column)[1]', 'sysname'), '[', ''), ']', '') AS ColumnName, 
       n.t.value('(@DataType)[1]', 'sysname') AS ConvertTo, 
       n.t.value('(@Length)[1]', 'int') AS ConvertToLength, 
       PhysicalOperator.value('(.//Predicate/ScalarOperator/@ScalarString)[1]','nvarchar(max)') AS ScalarString,
       query_plan,
       cp.plan_handle
    FROM sys.dm_exec_cached_plans AS cp 
        CROSS APPLY sys.dm_exec_query_plan(plan_handle) AS qp
        CROSS APPLY query_plan.nodes('/ShowPlanXML/BatchSequence/Batch/Statements/StmtSimple') AS batch(stmt)
        CROSS APPLY stmt.nodes('//ScalarOperator/Compare/ScalarOperator/Convert[@Implicit="1"]') AS n(t) 
        CROSS APPLY batch.stmt.nodes('.//RelOp[@PhysicalOp=''Index Scan'' or @PhysicalOp=''Clustered Index Scan'']') as RelOp(PhysicalOperator)
    WHERE n.t.exist('ScalarOperator/Identifier/ColumnReference[@Database=sql:variable("@db_name")][@Schema != ''[sys]'']') = 1
)
SELECT 
    pc.stmt,
    pc.DatabaseName,
    pc.SchemaName,
    pc.ObjectName,
    pc.ColumnName,
    ps.UsedPageCount,
    ix.name AS IndexName,
    CAST(ps.UsedPageCount/ 128. AS decimal(12,2)) AS UsedSizeMB,
    ps.TotalRowCount,
    qs.execution_count * UsedPageCount AS MostLogicalRead,
    cols.DATA_TYPE AS ConvertFrom, 
    cols.CHARACTER_MAXIMUM_LENGTH AS ConvertFromLength,
    pc.ConvertTo,
    pc.ConvertToLength,
    pc.ScalarString,
    pc.query_plan,
    qs.creation_time
    ,qs.last_execution_time
    ,qs.execution_count
    ,qs.total_worker_time
    ,qs.last_worker_time
    ,qs.min_worker_time
    ,qs.max_worker_time
    ,qs.total_physical_reads
    ,qs.last_physical_reads
    ,qs.min_physical_reads
    ,qs.max_physical_reads
    ,qs.total_logical_writes
    ,qs.last_logical_writes
    ,qs.min_logical_writes
    ,qs.max_logical_writes
    ,qs.total_logical_reads
    ,qs.last_logical_reads
    ,qs.min_logical_reads
    ,qs.max_logical_reads
    ,qs.total_clr_time
    ,qs.last_clr_time
    ,qs.min_clr_time
    ,qs.max_clr_time
    ,qs.total_elapsed_time
    ,qs.last_elapsed_time
    ,qs.min_elapsed_time
    ,qs.max_elapsed_time
    ,qs.total_rows
    ,qs.last_rows
    ,qs.min_rows
    ,qs.max_rows
FROM planCache AS pc
    INNER JOIN INFORMATION_SCHEMA.COLUMNS as cols WITH(NOLOCK)
    ON pc.SchemaName = cols.TABLE_SCHEMA
    AND pc.ObjectName = cols.TABLE_NAME
    AND pc.ColumnName = cols.COLUMN_NAME
    INNER JOIN sys.tables as tb WITH(NOLOCK)
    ON tb.schema_id = schema_id(pc.SchemaName)
        AND tb.name = pc.ObjectName
    INNER JOIN sys.indexes as ix WITH(NOLOCK)
    ON tb.object_id = ix.object_id
    LEFT JOIN (
                SELECT 
                    object_id,
                    index_id,
                    sum(used_page_count) AS UsedPageCount,
                    sum(row_count) AS TotalRowCount 
                FROM sys.dm_db_partition_stats as dps WITH(NOLOCK)
                GROUP BY object_id,Index_id
                ) as ps 
    ON ix.object_id = ps.object_id and ix.index_id = ps.index_id
    left join sys.dm_exec_query_stats qs on pc.plan_handle= qs.plan_handle
--DBCC freeproccache

查询结果的一步截图如下:
RDS SQL Server - 专题分享 - 巧用执行计划缓存之数据类型隐式转换

总结

这篇文章分享了如何从执行计划缓存中找到导致SQL Server数据类型隐式转化的查询语句,为我们针对特定查询语句的优化提供了基础,最终破解高CPU使用率的问题。