猪:过滤行条件
Pig: Filter rows conditions
我想要一个 pig 脚本来过滤不同条件下的行:
i2 = GROUP i1 ALL;
i3 = FOREACH i2 GENERATE AVG(i1.user_followers_count) AS avg_user_followers_count, AVG(i1.avl_user_total_retweets) AS avg_avl_user_total_retweets, AVG(i1.avl_user_total_likes) AS avg_avl_user_total_likes, AVG(i1.avl_user_total_replies) AS avg_avl_user_total_replies, AVG(i1.avl_user_engagements) AS avg_avl_user_engagements;
top = FILTER i1 BY (user_followers_count > i3.avg_user_followers_count) AND (avl_user_engagements > i3.avg_avl_user_engagements) AND (avl_user_total_retweets > i3.avg_avl_user_total_retweets) AND (avl_user_total_likes > i3.avg_avl_user_total_likes) AND (avl_user_total_replies > i3.avl_user_total_replies);
bot = FILTER i1 BY (user_followers_count < i3.avg_user_followers_count) AND (avl_user_engagements < i3.avg_avl_user_engagements) AND (avl_user_total_retweets < i3.avg_avl_user_total_retweets) AND (avl_user_total_likes < i3.avg_avl_user_total_likes) AND (avl_user_total_replies < i3.avl_user_total_replies);
也就是说,我把各方面都高于平均水平的都选为top
,把各方面都低于平均水平的都选为bottom
。
现在我想在将 top
和 bot
过滤到另一个名为 med
.我怎么做?
使用SPLIT
SPLIT i1 INTO
top IF((user_followers_count > i3.avg_user_followers_count) AND (avl_user_engagements > i3.avg_avl_user_engagements) AND (avl_user_total_retweets > i3.avg_avl_user_total_retweets) AND (avl_user_total_likes > i3.avg_avl_user_total_likes) AND (avl_user_total_replies > i3.avl_user_total_replies)),
bot IF((user_followers_count < i3.avg_user_followers_count) AND (avl_user_engagements < i3.avg_avl_user_engagements) AND (avl_user_total_retweets < i3.avg_avl_user_total_retweets) AND (avl_user_total_likes < i3.avg_avl_user_total_likes) AND (avl_user_total_replies < i3.avl_user_total_replies)),
med OTHERWISE;
我想要一个 pig 脚本来过滤不同条件下的行:
i2 = GROUP i1 ALL;
i3 = FOREACH i2 GENERATE AVG(i1.user_followers_count) AS avg_user_followers_count, AVG(i1.avl_user_total_retweets) AS avg_avl_user_total_retweets, AVG(i1.avl_user_total_likes) AS avg_avl_user_total_likes, AVG(i1.avl_user_total_replies) AS avg_avl_user_total_replies, AVG(i1.avl_user_engagements) AS avg_avl_user_engagements;
top = FILTER i1 BY (user_followers_count > i3.avg_user_followers_count) AND (avl_user_engagements > i3.avg_avl_user_engagements) AND (avl_user_total_retweets > i3.avg_avl_user_total_retweets) AND (avl_user_total_likes > i3.avg_avl_user_total_likes) AND (avl_user_total_replies > i3.avl_user_total_replies);
bot = FILTER i1 BY (user_followers_count < i3.avg_user_followers_count) AND (avl_user_engagements < i3.avg_avl_user_engagements) AND (avl_user_total_retweets < i3.avg_avl_user_total_retweets) AND (avl_user_total_likes < i3.avg_avl_user_total_likes) AND (avl_user_total_replies < i3.avl_user_total_replies);
也就是说,我把各方面都高于平均水平的都选为top
,把各方面都低于平均水平的都选为bottom
。
现在我想在将 top
和 bot
过滤到另一个名为 med
.我怎么做?
使用SPLIT
SPLIT i1 INTO
top IF((user_followers_count > i3.avg_user_followers_count) AND (avl_user_engagements > i3.avg_avl_user_engagements) AND (avl_user_total_retweets > i3.avg_avl_user_total_retweets) AND (avl_user_total_likes > i3.avg_avl_user_total_likes) AND (avl_user_total_replies > i3.avl_user_total_replies)),
bot IF((user_followers_count < i3.avg_user_followers_count) AND (avl_user_engagements < i3.avg_avl_user_engagements) AND (avl_user_total_retweets < i3.avg_avl_user_total_retweets) AND (avl_user_total_likes < i3.avg_avl_user_total_likes) AND (avl_user_total_replies < i3.avl_user_total_replies)),
med OTHERWISE;