一、聚合函數(shù)
對(duì)一組值進(jìn)行計(jì)算,并返回單個(gè)值,也被稱為組函數(shù);
聚合計(jì)算過程:選定分組字段 – 分組 – 計(jì)算:
1.選定分組字段:在 select 里;
2.分組:用 group by;
3.計(jì)算:根據(jù)實(shí)際情況,選定聚合函數(shù);聚合函數(shù)
1.計(jì)數(shù):count()
2.求和:sum()
3.最大值/最小值:max/min()
4.平均值:avg()聚合值過濾
聚合值過濾:having,不是where!目前SQL執(zhí)行順序:from – where – group by – having – select
eg:按省份和高考分?jǐn)?shù)分段,統(tǒng)計(jì)高分段的人數(shù):
安徽學(xué)生,高考分?jǐn)?shù)>620;
江蘇學(xué)生,高考分?jǐn)?shù)>610;
其他省份學(xué)生,高考分?jǐn)?shù)>600;
二、舉例練習(xí)
--常規(guī)聚合函數(shù)使用:
--注意事項(xiàng):hive中distinct和group by 不能一起用
--1、查詢每個(gè)學(xué)生考試單科最高分是多少?字段:學(xué)號(hào)、姓名、單科最高分;
select
stu_id as 學(xué)號(hào)
,name as 姓名
,max(score) as 單科最高分
from score_info
group by stu_id,name;
--2、查詢每個(gè)學(xué)生考試單科最低分是多少?字段:學(xué)號(hào)、姓名、單科最低分;
select
stu_id as 學(xué)號(hào)
,name as 姓名
,min(score) as 單科最低分
from score_info
group by stu_id,name;
--3、查詢每個(gè)學(xué)生考試平均分是多少?字段:學(xué)號(hào)、姓名、考試平均分;
select
stu_id as 學(xué)號(hào)
,name as 姓名
,avg(score) as 考試平均分
from score_info
group by stu_id,name;
--4、統(tǒng)計(jì)每個(gè)學(xué)生考了幾科?字段:學(xué)號(hào)、姓名、考試科數(shù);
select
stu_id as 學(xué)號(hào)
,name as 姓名
,count(distinct subject_id) as 考試科數(shù)
from score_info
group by stu_id,name;
--5、統(tǒng)計(jì)每個(gè)學(xué)生考試總分是多少?字段:學(xué)號(hào)、姓名、考試總分;
select
stu_id as 學(xué)號(hào)
,name as 姓名
,sum(score) as 考試總分
from score_info
group by stu_id,name;
--6、查詢每個(gè)省份,不同學(xué)院的人數(shù)有多少?字段:省份、學(xué)院、人數(shù);
select
from_where as 省份
,college as 學(xué)院
,count(distinct stu_id) as 人數(shù)
from student_info
group by from_where,college
order by from_where,college;
--7、查詢每個(gè)班級(jí)人數(shù)有多少?字段:學(xué)院、專業(yè)、班級(jí)、人數(shù);
select
college as 學(xué)院
,major as 專業(yè)
,class as 班級(jí)
,count(stu_id) as 人數(shù)
from class_info
group by college,major,class
order by college,major;
--與其他函數(shù)結(jié)合使用:
--1、查詢高數(shù)=100分 和 物理=100分的人數(shù)有多少?字段:高數(shù)滿分人數(shù),物理滿分人數(shù);
--if
select
count(if(subject='高數(shù)' and score=100,stu_id,null)) as 高數(shù)滿分人數(shù)
,count(if(subject='物理' and score=100,stu_id,null)) as 物理滿分人數(shù)
from score_info;
--case when
count(distinct (case when subject='高數(shù)' and score=100 then stu_id else null end)) as 高數(shù)滿分人數(shù)
,count(distinct (case when subject='物理' and score=100 then stu_id else null end)) as 物理滿分人數(shù)
from score_info;
--2、查詢每個(gè)班級(jí),高數(shù)和物理不及格的人數(shù)有多少(<60分)?字段:學(xué)院、專業(yè)、班級(jí)、高數(shù)不及格人數(shù)、物理不及格人數(shù);
select
college as 學(xué)院
,major as 專業(yè)
,class as 班級(jí)
,count(if(subject='高數(shù)' and score<60,subject,null)) as 高數(shù)不及格人數(shù)
,count(if(subject='物理' and score<60,subject,null)) as 物理不及格人數(shù)
from score_info join class_info
on score_info.stu_id=class_info.stu_id
group by college,major,class
order by college,major;
--
select
subject
,case when score>0 and score<60 then '0-60'
when score>=60 and score <70 then '60-70'
when score>=70 and score <80 then '70-80'
when score>=80 and score <80 then '80-90'
when score>=90 and score <100 then '90-100'
end as diff_part
,count(distinct stu_id) as 人數(shù)
from score_info
group by subject,diff_part;
--hive中,group by不能用別名(diff_part),需要全部復(fù)制過來