相信很多公司都會有需要列出每個年齡層員工人數,以下的SQL介紹如何方便的算出各年齡層的員工人數
最近跟同事在練習SQL時,有個題目是「以10歲為間隔列出每個年齡層員工人數」,以下先準備資料,
USE tempdb
go
--建立測試的資料表
CREATE TABLE [dbo].[EmployeesAge](
[empid] [int] IDENTITY(1,1) NOT NULL,
[lastname] [nvarchar](20) NOT NULL,
[firstname] [nvarchar](10) NOT NULL,
[title] [nvarchar](30) NOT NULL,
[titleofcourtesy] [nvarchar](25) NOT NULL,
[birthdate] [datetime] NOT NULL,
[hiredate] [datetime] NOT NULL,
[address] [nvarchar](60) NOT NULL,
[city] [nvarchar](15) NOT NULL,
[region] [nvarchar](15) NULL,
[postalcode] [nvarchar](10) NULL,
[country] [nvarchar](15) NOT NULL,
[phone] [nvarchar](24) NOT NULL,
[mgrid] [int] NULL
) ON [PRIMARY]
GO
--新增 10 筆資料
SET IDENTITY_INSERT [dbo].[EmployeesAge] ON
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (1, N'Davis', N'Sara', N'CEO', N'Ms.', CAST(N'1958-12-08 00:00:00.000' AS DateTime), CAST(N'2002-05-01 00:00:00.000' AS DateTime), N'7890 - 20th Ave. E., Apt. 2A', N'Seattle', N'WA', N'10003', N'USA', N'(206) 555-0101', NULL)
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (2, N'Funk', N'Don', N'Vice President, Sales', N'Dr.', CAST(N'1962-02-19 00:00:00.000' AS DateTime), CAST(N'2002-08-14 00:00:00.000' AS DateTime), N'9012 W. Capital Way', N'Tacoma', N'WA', N'10001', N'USA', N'(206) 555-0100', 1)
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (3, N'Lew', N'Judy', N'Sales Manager', N'Ms.', CAST(N'1973-08-30 00:00:00.000' AS DateTime), CAST(N'2002-04-01 00:00:00.000' AS DateTime), N'2345 Moss Bay Blvd.', N'Kirkland', N'WA', N'10007', N'USA', N'(206) 555-0103', 2)
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (4, N'Peled', N'Yael', N'Sales Representative', N'Mrs.', CAST(N'1947-09-19 00:00:00.000' AS DateTime), CAST(N'2003-05-03 00:00:00.000' AS DateTime), N'5678 Old Redmond Rd.', N'Redmond', N'WA', N'10009', N'USA', N'(206) 555-0104', 3)
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (5, N'Buck', N'Sven', N'Sales Manager', N'Mr.', CAST(N'1965-03-04 00:00:00.000' AS DateTime), CAST(N'2003-10-17 00:00:00.000' AS DateTime), N'8901 Garrett Hill', N'London', NULL, N'10004', N'UK', N'(71) 234-5678', 2)
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (6, N'Suurs', N'Paul', N'Sales Representative', N'Mr.', CAST(N'1973-07-02 00:00:00.000' AS DateTime), CAST(N'2003-10-17 00:00:00.000' AS DateTime), N'3456 Coventry House, Miner Rd.', N'London', NULL, N'10005', N'UK', N'(71) 345-6789', 5)
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (7, N'King', N'Russell', N'Sales Representative', N'Mr.', CAST(N'1970-05-29 00:00:00.000' AS DateTime), CAST(N'2004-01-02 00:00:00.000' AS DateTime), N'6789 Edgeham Hollow, Winchester Way', N'London', NULL, N'10002', N'UK', N'(71) 123-4567', 5)
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (8, N'Cameron', N'Maria', N'Sales Representative', N'Ms.', CAST(N'1968-01-09 00:00:00.000' AS DateTime), CAST(N'2004-03-05 00:00:00.000' AS DateTime), N'4567 - 11th Ave. N.E.', N'Seattle', N'WA', N'10006', N'USA', N'(206) 555-0102', 3)
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (9, N'Dolgopyatova', N'Zoya', N'Sales Representative', N'Ms.', CAST(N'1976-01-27 00:00:00.000' AS DateTime), CAST(N'2004-11-15 00:00:00.000' AS DateTime), N'1234 Houndstooth Rd.', N'London', NULL, N'10008', N'UK', N'(71) 456-7890', 5)
GO
INSERT [dbo].[EmployeesAge] ([empid], [lastname], [firstname], [title], [titleofcourtesy], [birthdate], [hiredate], [address], [city], [region], [postalcode], [country], [phone], [mgrid])
VALUES (10, N'Rainmaker', N'Ho', N'Developer', N'Mr.', CAST(N'1976-01-27 00:00:00.000' AS DateTime), CAST(N'2004-11-15 00:00:00.000' AS DateTime), N'1234 Houndstooth Rd.', N'London', NULL, N'10008', N'UK', N'(71) 456-7890', 5)
GO
SET IDENTITY_INSERT [dbo].[EmployeesAge] OFF
GO
Select * from [dbo].[EmployeesAge];
如果以10歲為間隔的話,那可以算出每個員工的年齡,去除以間隔的值,再Group起來就可以了,如下,
WITH empAge
AS
(
SELECT birthdate ,
GETDATE() AS today ,
DATEDIFF(YY, birthdate, GETDATE())
- CASE WHEN DATEADD(YY, DATEDIFF(YY, birthdate, GETDATE()), birthdate) > GETDATE()
THEN 1
ELSE 0
END AS age
FROM [EmployeesAge]
)
--SELECT STR(age / 10 * 10), * FROM empAge
SELECT CAST(age / 10 * 10 AS VARCHAR) + '~' + CAST( age / 10 * 10 + 9 AS VARCHAR) AS [range], COUNT(*) AS levelCount
FROM empAge
GROUP BY CAST(age / 10 * 10 AS VARCHAR) + '~' + CAST( age / 10 * 10 + 9 AS VARCHAR);
那如果沒有資料的間隔也要列出來的話,可以使用CTE來建立間隔,詳細可參考「透過common table expressions (CTE)來產生某區間的連續序號」,如下,
--目前的序號
DECLARE @CURRENT_SEQ INT
SET @CURRENT_SEQ = 0;
--要取號的筆數
DECLARE @NUM_COUNT INT
SET @NUM_COUNT = 10;
--間隔的大小
DECLARE @RANGE INT
SET @RANGE = 10;
--最後的序號
DECLARE @AFTER_SEQ INT
SET @AFTER_SEQ = @CURRENT_SEQ + (@NUM_COUNT * @RANGE);
--產生間隔的資料
;WITH SEQ_RESULT
AS(
SELECT @AFTER_SEQ -1 AS SEQ_NO, @AFTER_SEQ - @RANGE AS RANGE_START, @NUM_COUNT AS NUM_CNT
UNION ALL
SELECT Y.SEQ_NO - @RANGE, RANGE_START - @RANGE, Y.NUM_CNT -1
FROM SEQ_RESULT Y
WHERE Y.NUM_CNT > 1
)
--列出0~99的區間
SELECT * FROM SEQ_RESULT;
如果只是要建立0~99的區間資料,就可將原有的變數,直接給常數值,如下,
;WITH SEQ_RESULT
AS(
SELECT 100 -1 AS SEQ_NO, 100 - 10 AS RANGE_START, 10 AS NUM_CNT
UNION ALL
SELECT Y.SEQ_NO - 10, RANGE_START - 10, Y.NUM_CNT -1
FROM SEQ_RESULT Y
WHERE Y.NUM_CNT > 1
)
--列出0~99的區間
SELECT * FROM SEQ_RESULT;
所以用Right Outer Join它們結合起來,如下,
--用生日去算出年紀再計算出每個間隔的數量
--產生間隔的資料
;WITH SEQ_RESULT
AS(
SELECT 99 AS SEQ_NO, 100 - 10 AS RANGE_START, 10 AS NUM_CNT
UNION ALL
SELECT Y.SEQ_NO - 10, RANGE_START - 10, Y.NUM_CNT -1
FROM SEQ_RESULT Y
WHERE Y.NUM_CNT > 1
)
,
empAge
AS
(
SELECT DATEDIFF(YY, birthdate, GETDATE())
- CASE WHEN DATEADD(YY, DATEDIFF(YY, birthdate, GETDATE()), birthdate) > GETDATE()
THEN 1
ELSE 0
END AS age
FROM EmployeesAge
)
SELECT rag.RANGE_START, rag.SEQ_NO, COUNT(age) AS range_count
FROM empAge RIGHT JOIN SEQ_RESULT rag
ON empAge.age BETWEEN rag.RANGE_START AND rag.SEQ_NO
GROUP BY rag.RANGE_START, rag.SEQ_NO
ORDER BY rag.RANGE_START;
--最後將測試表Drop掉
drop table EmployeesAge;
後來Scar大大及Kenny大大提到說,如果人數多的話,用員工的生日去算出年齡似乎會比較慢,下一篇我們再針對先算出年齡,還是用間隔算出間隔的日期,再透過生日去BETWEEN的這2種方式比較看看。
參考資料
透過common table expressions (CTE)來產生某區間的連續序號
Hi,
亂馬客Blog已移到了 「亂馬客 : Re:從零開始的軟體開發生活」
請大家繼續支持 ^_^