I'm trying to get the top 5 comments by score for each Reddit post. I only want to retrieve the top N comments by score for each post title.
Example: I only would want comment 1 and 2 for each post.
Post 1 | Comment 1 | Comment Score 10
Post 1 | Comment 2 | Comment Score 9
Post 1 | Comment 3 | Comment Score 8
Post 2 | Comment 1 | Comment Score 10
Post 2 | Comment 2 | Comment Score 9
Post 2 | Comment 3 | Comment Score 8
StandardSQL
SELECT
posts.title,
posts.url,
posts.score AS postsscore,
DATE_TRUNC(DATE(TIMESTAMP_SECONDS(posts.created_utc)), MONTH),
SUBSTR(comments.body, 0, 80),
comments.score AS commentsscore,
comments.id
FROM
`fh-bigquery.reddit_posts.2015*` AS posts
JOIN `fh-bigquery.reddit_comments.2015*` AS comments
ON posts.id = SUBSTR(comments.link_id, 4)
WHERE
posts.subreddit = 'Showerthoughts'
AND posts.score >100
AND comments.score >100
ORDER BY
posts.score DESC,
posts.title DESC,
comments.score DESC
Below is for BigQuery Standard SQL
#standardSQL
SELECT * EXCEPT(pos) FROM (
SELECT
posts.title,
posts.url,
posts.score AS postsscore,
DATE_TRUNC(DATE(TIMESTAMP_SECONDS(posts.created_utc)), MONTH),
SUBSTR(comments.body, 0, 80),
comments.score AS commentsscore,
comments.id,
ROW_NUMBER() OVER(PARTITION BY posts.url ORDER BY comments.score DESC) pos
FROM `fh-bigquery.reddit_posts.2015*` AS posts
JOIN `fh-bigquery.reddit_comments.2015*` AS comments
ON posts.id = SUBSTR(comments.link_id, 4)
WHERE posts.subreddit = 'Showerthoughts'
AND posts.score >100
AND comments.score >100
)
WHERE pos < 3
ORDER BY postsscore DESC, title DESC, commentsscore DESC