显示每组最大值的配置单元 table
Display hive table with max value per group
给定的配置单元查询。
select field_name, sum(frequency) as total_sum, ontology_uri
from feed_one.dd_instance
where feed_name="mrna"
group by ontology_uri,field_name order by total_sum desc;
生成以下内容table:
**field_name** **total_sum** **ontology_uri**
tcga_participant_barcode 1944 http://data.bioontology.org/ontologies/NCIT
gene 1251 http://data.bioontology.org/ontologies/GEXO
protocol 612 http://data.bioontology.org/ontologies/PR
gene 378 http://data.bioontology.org/ontologies/OGG
geneid 315 http://data.bioontology.org/ontologies/NCIT
gene 315 http://data.bioontology.org/ontologies/RETO
cohort 270 http://data.bioontology.org/ontologies/NCIT
我想用它的结果显示每个 field_name 的最大值 total_sum。也就是说,以下是我的期望:
**field_name** **max(total_sum)** **ontology_uri**
tcga_participant_barcode 1944 http://data.bioontology.org/ontologies/NCIT
gene 1251 http://data.bioontology.org/ontologies/GEXO
protocol 612 http://data.bioontology.org/ontologies/PR
geneid 315 http://data.bioontology.org/ontologies/NCIT
cohort 270 http://data.bioontology.org/ontologies/NCIT
我尝试了以下 HQL,但导致我 org.apache.hive.service.cli.HiveSQLException:编译语句时出错:失败:ParseException 行 3:1 不匹配的输入 'select' 期待(近'(' 在表达式规范中
select * from A
where (A.field_name, A.total_sum) IN
(select A.field_name, max(A.total_sum) as maximum
from
(
select field_name, sum(frequency) as total_sum, ontology_uri
from feed_one.dd_instance
where feed_name="mrna"
group by ontology_uri,field_name order by total_sum desc
) as A
group by A.field_name);
select t1.field_name, max(t1.total_sum),t1.ontology_uri
from
(
select field_name, sum(frequency) as total_sum, ontology_uri
from feed_one.dd_instance
where feed_name="mrna"
group by ontology_uri,field_name
) t1
group by t1.field_name, t1.ontology_uri
使用 row_number()
和 partition
select * from
(
select
field_name,
ontology_uri,
sum(frequency) over (partition by field_name,ontology_uri) as total_sum,
row_number() over (partition by field_name,ontology_uri order by total_sum desc) as row_no
from feed_one.dd_instance
where feed_name="mrna"
) res
where res.row_no = 1
给定的配置单元查询。
select field_name, sum(frequency) as total_sum, ontology_uri
from feed_one.dd_instance
where feed_name="mrna"
group by ontology_uri,field_name order by total_sum desc;
生成以下内容table:
**field_name** **total_sum** **ontology_uri**
tcga_participant_barcode 1944 http://data.bioontology.org/ontologies/NCIT
gene 1251 http://data.bioontology.org/ontologies/GEXO
protocol 612 http://data.bioontology.org/ontologies/PR
gene 378 http://data.bioontology.org/ontologies/OGG
geneid 315 http://data.bioontology.org/ontologies/NCIT
gene 315 http://data.bioontology.org/ontologies/RETO
cohort 270 http://data.bioontology.org/ontologies/NCIT
我想用它的结果显示每个 field_name 的最大值 total_sum。也就是说,以下是我的期望:
**field_name** **max(total_sum)** **ontology_uri**
tcga_participant_barcode 1944 http://data.bioontology.org/ontologies/NCIT
gene 1251 http://data.bioontology.org/ontologies/GEXO
protocol 612 http://data.bioontology.org/ontologies/PR
geneid 315 http://data.bioontology.org/ontologies/NCIT
cohort 270 http://data.bioontology.org/ontologies/NCIT
我尝试了以下 HQL,但导致我 org.apache.hive.service.cli.HiveSQLException:编译语句时出错:失败:ParseException 行 3:1 不匹配的输入 'select' 期待(近'(' 在表达式规范中
select * from A
where (A.field_name, A.total_sum) IN
(select A.field_name, max(A.total_sum) as maximum
from
(
select field_name, sum(frequency) as total_sum, ontology_uri
from feed_one.dd_instance
where feed_name="mrna"
group by ontology_uri,field_name order by total_sum desc
) as A
group by A.field_name);
select t1.field_name, max(t1.total_sum),t1.ontology_uri
from
(
select field_name, sum(frequency) as total_sum, ontology_uri
from feed_one.dd_instance
where feed_name="mrna"
group by ontology_uri,field_name
) t1
group by t1.field_name, t1.ontology_uri
使用 row_number()
和 partition
select * from
(
select
field_name,
ontology_uri,
sum(frequency) over (partition by field_name,ontology_uri) as total_sum,
row_number() over (partition by field_name,ontology_uri order by total_sum desc) as row_no
from feed_one.dd_instance
where feed_name="mrna"
) res
where res.row_no = 1