你的位置:首页 > 数据库

[数据库]一个有趣的SQL Server 层级汇总数据问题


    看SQL Server 大V宋大侠的博客文章,发现了一个有趣的sql server 层级汇总数据问题。
    
    具体的问题如下:
    parent_id emp_id emp_name total_amout 
    NULL 2 Andrew 200 
    2 1 Nancy 100 
    2 3 Janet 120 
    3 4 Michael 80 
    1 5 Robert 50 
    每个员工的总销售额=自己的销售额+其下级员工的总销售额, 
    比如: 
    Andrew = 200_100_120_80_50=550 
    Nancy = 100+50=150 
    Janet = 120+80=200 
    Michael = 80 
    Robert = 50 
    这个用SQL怎样可以查询得到,请教一下大家???
 
    从数据表中的数据以及问题阐述来看可以确定该数据表是个父子层级类型数据表,这个在纬度类型中是一种比较常见:父子纬度。 从名字解释来看就是一种自引用的数据表,大家最熟悉的组织机构就是具有这种层级结构,其中不同级别的机构具有共同的特性。这种层级结构如下图所示:

以上图来自百度查询得到。
         
     看来宋大侠针对该问题的解决方案(CTE递归查询+游标),还有其他的博友的评论(有的支出数据表设计的不完善,还有通过虚拟出层级层级字符串列来实现的,还有获取当前层级以下所有层级的汇总封装成存储等等)。
    
    为了实现该问题,我使用的是CTE递归查询+APPLY,具体的实现思路如下:
    1、通过CTE递归查询虚拟出若干列,其中就有层级索引字符串列(该列表示具有层级的标识ID的字符串格式,便于查找)。
    2、使用APPLY来实现汇总数据(当然也可以使用SELECT + SUBQUERY)。
   
具体演示实现代码如下:
IF OBJECT_ID(N'dbo.MyEmp', N'U') IS NOT NULLBEGIN  DROP TABLE dbo.MyEmp;ENDGOCREATE TABLE dbo.MyEmp (  MyEmpID INT NOT NULL,  ParentID INT NULL,  MyEmpName NVARCHAR(20) NOT NULL,  HoursSalary INT NOT NULL);GOIF OBJECT_ID(N'PK_U_CL_MyEmp_MyEmpID', N'PK') IS NULLBEGIN  ALTER TABLE [dbo].[MyEmp] ADD CONSTRAINT [PK_U_CL_MyEmp_MyEmpID] PRIMARY KEY CLUSTERED   (    [MyEmpID] ASC  )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90)   ON [PRIMARY];ENDGOIF OBJECT_ID(N'FK_MyEmp_MyEmp_ParentID', N'F') IS NULLBEGIN  ALTER TABLE dbo.MyEmp ADD CONSTRAINT FK_MyEmp_MyEmp_ParentID FOREIGN KEY (ParentID) REFERENCES dbo.MyEmp (MyEmpID);ENDGO-- Insert Test DataINSERT INTO dbo.MyEmp (MyEmpID, ParentID, MyEmpName, HoursSalary) VALUES (1, NULL, N'Andrew', 200),(2, 1, N'Nancy', 100),(3, 1, N'Janet', 120),(4, 3, N'Michael', 80),(5, 2, N'Robert', 50)GO;WITH tData (MyEmpID, MyEmpName, ParentID, ParentName, HoursSalary, ParentHierarchyIndex, HierarchyIndex, LevelID, HierarchyName, HierarchyName2) AS (  -- 基准点查询  SELECT MyEmpID                                                /*雇员ID*/    ,MyEmpName                                                /*雇员名称*/    ,ISNULL(ParentID, 0) AS ParentID                                    /*父雇员ID*/    ,CAST(N'' AS NVARCHAR(20)) AS ParentName                                /*父雇员名称*/    ,HoursSalary                                              /*小时薪水*/    ,CAST(CONCAT(',', ISNULL(ParentID, 0), ',') AS VARCHAR(300)) AS ParentHierarchyIndex          /*父层级索引字符串*/    ,CAST(CONCAT(',', ISNULL(ParentID, 0), ',', MyEmpID, ',') AS VARCHAR(300)) AS HierarchyIndex      /*层级索引字符串串,包含当前层级*/     ,CAST(1 AS INT) AS LevelID                                        /*层级ID,根层级为1,层级越深则数字越大*/    ,CAST(MyEmpName AS NVARCHAR(800)) AS HierarchyName                            /*层级名称,树形结构显示*/    ,CAST(MyEmpName AS NVARCHAR(80)) AS HierarchyName2                            /*层级名称2,水平结构显示*/  FROM dbo.MyEmp  WHERE ParentID IS NULL  -- 递归查询  UNION ALL  SELECT T.MyEmpID    ,T.MyEmpName    ,T.ParentID    ,T2.MyEmpName    ,T.HoursSalary    ,CAST(CONCAT(T2.ParentHierarchyIndex, T.ParentID, ',') AS VARCHAR(300)) AS ParentHierarchyIndex    ,CAST(CONCAT(T2.HierarchyIndex, T.MyEmpID, ',') AS VARCHAR(300)) AS HierarchyIndex     ,T2.LevelID + 1 AS LevelID    ,CAST(CONCAT(REPLICATE(N'|  ', T2.LevelID), T.MyEmpName) AS NVARCHAR(800)) AS HierarchyName    ,CAST(CONCAT(T2.HierarchyName2, '->', T.MyEmpName) AS NVARCHAR(80)) AS HierarchyName2  FROM dbo.MyEmp AS T    INNER JOIN tData AS T2      ON T.ParentID = T2.MyEmpID) -- 使用HierarchyIndex来实现-- CROSS APPLYSELECT T.*, T2.TotalSalary AS TotalSalaryFROM tData AS T  CROSS APPLY (SELECT SUM(tData.HoursSalary) AS TotalSalary  FROM tData  WHERE HierarchyIndex LIKE CONCAT(T.HierarchyIndex, '%')) AS T2ORDER BY T.HierarchyIndex ASC;-- SELECT + 子查询--SELECT T.*, TotalSalary = ( SELECT SUM(tData.HoursSalary) FROM tData WHERE tData.HierarchyIndex LIKE CONCAT(T.HierarchyIndex, '%'))--FROM tData AS T--ORDER BY T.HierarchyIndex ASC;-- 使用ParentHierarchyIndex-- CROSS APPLY--SELECT T.*, T.HoursSalary + T2.DownMemberTotalHoursSalary AS TotalSalary--FROM tData AS T--  CROSS APPLY (--    SELECT ISNULL(SUM(tData.HoursSalary), 0) AS DownMemberTotalHoursSalary--    FROM tData--    WHERE tData.ParentHierarchyIndex LIKE CONCAT(T.ParentHierarchyIndex, T.MyEmpID, '%')--  ) AS T2--ORDER BY T.HierarchyIndex ASC;-- SELECT + 子查询--SELECT T.*, T.HoursSalary + (SELECT ISNULL(SUM(tData.HoursSalary), 0) AS DownMemberTotalHoursSalary--    FROM tData--    WHERE tData.ParentHierarchyIndex LIKE CONCAT(T.ParentHierarchyIndex, T.MyEmpID, '%')) AS TotalSalary--FROM tData AS T--ORDER BY T.HierarchyIndex ASC;GO

  

以上解决方案是在不修改数据结构的情况下来实现的,从以上解决方案中,我们可以从数据表的设计入手,将虚拟出来的父层级索引字符串列增加到数据表中,将该列创建为聚集索引, 便于提高查询性能。
 
增加新列的T-SQL脚本如下:
IF NOT EXISTS (SELECT 1 FROM sys.columns WHERE OBJECT_ID = OBJECT_ID(N'dbo.MyEmp', N'U') AND name = N'HierarchyIndex')BEGIN  ALTER TABLE dbo.MyEmp ADD HierarchyIndex VARCHAR(800) NOT NULL CONSTRAINT DF_MyEmp_HierarchyIndex DEFAULT '';ENDGO

  如果该列创建为聚集且唯一,则相应的T-SQL脚本如下:

-- 删除外键IF OBJECT_ID(N'FK_MyEmp_MyEmp_ParentID', N'F') IS NOT NULLBEGIN  ALTER TABLE dbo.MyEmp DROP CONSTRAINT FK_MyEmp_MyEmp_ParentID;ENDGO-- 删除主键IF OBJECT_ID(N'PK_U_CL_MyEmp_MyEmpID', N'PF') IS NULLBEGIN  ALTER TABLE dbo.MyEmp DROP CONSTRAINT PK_U_CL_MyEmp_MyEmpID;ENDGO-- 创建(唯一:语义分析得到的,没有使用创建UNIQUE关键字)聚集索引IF NOT EXISTS (SELECT 1 FROM sys.indexes WHERE OBJECT_ID = OBJECT_ID(N'dbo.MyEmp', N'U') AND name = N'IX_U_CL_MyEmp_HierarchyIndex')BEGIN  CREATE CLUSTERED INDEX IX_U_CL_MyEmp_HierarchyIndex ON dbo.MyEmp  (    HierarchyIndex ASC  ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90)ENDGO-- 创建主键且非聚集索引IF OBJECT_ID(N'PK_U_CL_MyEmp_MyEmpID', N'PK') IS NULLBEGIN  ALTER TABLE [dbo].[MyEmp] ADD CONSTRAINT [PK_U_NCL_MyEmp_MyEmpID] PRIMARY KEY NONCLUSTERED   (    [MyEmpID] ASC  )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90)   ON [PRIMARY];ENDGO-- 创建外键IF OBJECT_ID(N'FK_MyEmp_MyEmp_ParentID', N'F') IS NULLBEGIN  ALTER TABLE dbo.MyEmp ADD CONSTRAINT FK_MyEmp_MyEmp_ParentID FOREIGN KEY (ParentID) REFERENCES dbo.MyEmp (MyEmpID);ENDGO

  同步N'HierarchyIndex字段列值的T-SQL如下:

;WITH tData (MyEmpID, HierarchyIndex) AS (  -- 基准点查询  SELECT MyEmpID, CAST(CONCAT(',', ISNULL(ParentID, 0), ',', MyEmpID, ',') AS VARCHAR(300)) AS HierarchyIndex  FROM dbo.MyEmp  WHERE ParentID IS NULL  -- 递归查询  UNION ALL  SELECT T.MyEmpID, CAST(CONCAT(T2.HierarchyIndex, T.MyEmpID, ',') AS VARCHAR(300)) AS HierarchyIndex  FROM dbo.MyEmp AS T /*子表*/    INNER JOIN tData AS T2 /*父表*/      ON T.ParentID = T2.MyEmpID)--SELECT T.*UPDATE T2SET T2.HierarchyIndex = T.HierarchyIndexFROM tData AS T  INNER JOIN dbo.MyEmp AS T2    ON T.MyEmpID = T2.MyEmpIDWHERE T2.HierarchyIndex = '';GO

  使用以下T-SQL验证数据是否已经修改:

SELECT MyEmpID, ParentID, MyEmpName, HoursSalary, HierarchyIndexFROM dbo.MyEmp;GO

  解决该问题的解决方案的T-SQL语句如下:

SELECT T.MyEmpID, T.ParentID, T.MyEmpName, T.HoursSalary, T.HierarchyIndex, T2.TotalSalary AS TotalSalaryFROM dbo.MyEmp AS T  CROSS APPLY (SELECT SUM(HoursSalary) AS TotalSalary FROM dbo.MyEmp WHERE HierarchyIndex LIKE CONCAT(T.HierarchyIndex, '%')) AS T2;GO