MySql查询多条记录
MySql Query Multiple Records
我有以下关于 tf/idf 测量的查询。我的目标是获得每个日期的前 10 名,如 20160401 的前 10 名,20160402 的前 10 名 etc.But 根据我的查询,我每个 date.As 有超过 10 个,它可以看到我无法订购,应该只有一个 1 ,一个 2 一个 3 ,我的意思是每个日期最多 10 个。
date word rowNumber
20160401 word 1
20160401 word 2
..
20160401 word 10
-------------------
20160402 word 1
20160402 word 2
..
20160402 word 10
以下是我对 tf idf 测量的查询。在 newsdetails table 中,我有像
这样的列和值
date word wordCount
20160401 yürürlük 1
20160401 mülteci 1
20160401 anlaşma 2
20160401 pazartesi 1
20160401 itibaren 1
20160401 uygula 2
-------------------
SELECT t.newsCreatedTime,t.word,t.tf_idf as tweetRatio ,rn
FROM (
select w.newsCreatedTime,w.word,w.wordCount,t1.topic_sum,w.wordCount/t1.topic_sum as tf,
t2.idf,(w.wordCount/t1.topic_sum)*(t2.idf) as tf_idf,
(@rn := if(@t = w.newsCreatedTime, @rn + 1,
if(@t := w.newsCreatedTime, 1, 1)
)
) as rn
FROM newsdetails w JOIN
( select newsCreatedTime, sum(wordCount) as topic_sum
from newsdetails where trim(word)<>'limit' and length(trim(word))>3 and newsCreatedTime>=20160401
group by newsCreatedTime
) t1
on w.newsCreatedTime=t1.newsCreatedTime and trim(w.word)<>'limit' and length(trim(w.word))>3 and w.newsCreatedTime>=20160401
join (
select w.word, log10(t_cnts.cnts/count(*)) as idf
from newsdetails w,
(select count(distinct newsCreatedTime) as cnts from newsdetails where trim(word)<>'limit' and length(trim(word))>3 and newsCreatedTime>=20160401) t_cnts
group by w.word
) t2
on w.word=t2.word CROSS JOIN
(SELECT @t := '', @rn := 0) params
ORDER BY tf_idf desc
) t
WHERE rn <=10
输出是:
Date word ratio rowNumber
20160401 kadın -0.0005375613007471833 1
20160401 beyaz -0.00006167450024777626 1
20160401 alışveriş -0.000015563777946653284 1
20160401 ösym 0 1
20160401 yürürlük 0.00004586823255826781 1
20160401 haber -0.0003304889014365445 2
20160401 taraf -0.00019041987363584725 2
20160401 merkezi -0.00013800691483414033 2
20160401 mülteci -0.000056636107108483694 2
20160401 perde 0.00004941253963307586 2
20160401 sınav -0.00017806007150496475 3
但我只想获得每个日期的前 10 个单词及其比率。
类似的东西可以在 SQL 服务器中运行 - 可能需要调整 MySQL
的语法
select * from(
select *, row_number() over(partition by date order by ratio) rownum from (SELECT t.newsCreatedTime,t.word,t.tf_idf as tweetRatio ,rn
FROM (
select w.newsCreatedTime,w.word,w.wordCount,t1.topic_sum,w.wordCount/t1.topic_sum as tf,
t2.idf,(w.wordCount/t1.topic_sum)*(t2.idf) as tf_idf,
(@rn := if(@t = w.newsCreatedTime, @rn + 1,
if(@t := w.newsCreatedTime, 1, 1)
)
) as rn
FROM newsdetails w JOIN
( select newsCreatedTime, sum(wordCount) as topic_sum
from newsdetails where trim(word)<>'limit' and length(trim(word))>3 and newsCreatedTime>=20160401
group by newsCreatedTime
) t1
on w.newsCreatedTime=t1.newsCreatedTime and trim(w.word)<>'limit' and length(trim(w.word))>3 and w.newsCreatedTime>=20160401
join (
select w.word, log10(t_cnts.cnts/count(*)) as idf
from newsdetails w,
(select count(distinct newsCreatedTime) as cnts from newsdetails where trim(word)<>'limit' and length(trim(word))>3 and newsCreatedTime>=20160401) t_cnts
group by w.word
) t2
on w.word=t2.word CROSS JOIN
(SELECT @t := '', @rn := 0) params
ORDER BY tf_idf desc
) t
WHERE rn <=10 )a)b where rownum<=10
我有以下关于 tf/idf 测量的查询。我的目标是获得每个日期的前 10 名,如 20160401 的前 10 名,20160402 的前 10 名 etc.But 根据我的查询,我每个 date.As 有超过 10 个,它可以看到我无法订购,应该只有一个 1 ,一个 2 一个 3 ,我的意思是每个日期最多 10 个。
date word rowNumber
20160401 word 1
20160401 word 2
..
20160401 word 10
-------------------
20160402 word 1
20160402 word 2
..
20160402 word 10
以下是我对 tf idf 测量的查询。在 newsdetails table 中,我有像
这样的列和值 date word wordCount
20160401 yürürlük 1
20160401 mülteci 1
20160401 anlaşma 2
20160401 pazartesi 1
20160401 itibaren 1
20160401 uygula 2
-------------------
SELECT t.newsCreatedTime,t.word,t.tf_idf as tweetRatio ,rn
FROM (
select w.newsCreatedTime,w.word,w.wordCount,t1.topic_sum,w.wordCount/t1.topic_sum as tf,
t2.idf,(w.wordCount/t1.topic_sum)*(t2.idf) as tf_idf,
(@rn := if(@t = w.newsCreatedTime, @rn + 1,
if(@t := w.newsCreatedTime, 1, 1)
)
) as rn
FROM newsdetails w JOIN
( select newsCreatedTime, sum(wordCount) as topic_sum
from newsdetails where trim(word)<>'limit' and length(trim(word))>3 and newsCreatedTime>=20160401
group by newsCreatedTime
) t1
on w.newsCreatedTime=t1.newsCreatedTime and trim(w.word)<>'limit' and length(trim(w.word))>3 and w.newsCreatedTime>=20160401
join (
select w.word, log10(t_cnts.cnts/count(*)) as idf
from newsdetails w,
(select count(distinct newsCreatedTime) as cnts from newsdetails where trim(word)<>'limit' and length(trim(word))>3 and newsCreatedTime>=20160401) t_cnts
group by w.word
) t2
on w.word=t2.word CROSS JOIN
(SELECT @t := '', @rn := 0) params
ORDER BY tf_idf desc
) t
WHERE rn <=10
输出是:
Date word ratio rowNumber
20160401 kadın -0.0005375613007471833 1
20160401 beyaz -0.00006167450024777626 1
20160401 alışveriş -0.000015563777946653284 1
20160401 ösym 0 1
20160401 yürürlük 0.00004586823255826781 1
20160401 haber -0.0003304889014365445 2
20160401 taraf -0.00019041987363584725 2
20160401 merkezi -0.00013800691483414033 2
20160401 mülteci -0.000056636107108483694 2
20160401 perde 0.00004941253963307586 2
20160401 sınav -0.00017806007150496475 3
但我只想获得每个日期的前 10 个单词及其比率。
类似的东西可以在 SQL 服务器中运行 - 可能需要调整 MySQL
的语法select * from(
select *, row_number() over(partition by date order by ratio) rownum from (SELECT t.newsCreatedTime,t.word,t.tf_idf as tweetRatio ,rn
FROM (
select w.newsCreatedTime,w.word,w.wordCount,t1.topic_sum,w.wordCount/t1.topic_sum as tf,
t2.idf,(w.wordCount/t1.topic_sum)*(t2.idf) as tf_idf,
(@rn := if(@t = w.newsCreatedTime, @rn + 1,
if(@t := w.newsCreatedTime, 1, 1)
)
) as rn
FROM newsdetails w JOIN
( select newsCreatedTime, sum(wordCount) as topic_sum
from newsdetails where trim(word)<>'limit' and length(trim(word))>3 and newsCreatedTime>=20160401
group by newsCreatedTime
) t1
on w.newsCreatedTime=t1.newsCreatedTime and trim(w.word)<>'limit' and length(trim(w.word))>3 and w.newsCreatedTime>=20160401
join (
select w.word, log10(t_cnts.cnts/count(*)) as idf
from newsdetails w,
(select count(distinct newsCreatedTime) as cnts from newsdetails where trim(word)<>'limit' and length(trim(word))>3 and newsCreatedTime>=20160401) t_cnts
group by w.word
) t2
on w.word=t2.word CROSS JOIN
(SELECT @t := '', @rn := 0) params
ORDER BY tf_idf desc
) t
WHERE rn <=10 )a)b where rownum<=10