news icon indicating copy to clipboard operation
news copied to clipboard

Readme: Evaluation of upvoteRate for resubmitted urls in dataset

Open fdietze opened this issue 3 years ago • 0 comments

with subs as (
    select id, url, max(score) as score
    from stories join dataset using (id)
    group by id
)
select url, count(*) submission_count, max(score) as max_score, min(score) as min_score, group_concat(score) as scores
from subs
group by 1
having submission_count > 1
and max(score) - min(score) > 50
order by max_score desc
limit 30;
url                                                           submission_count  max_score  min_score  scores
------------------------------------------------------------  ----------------  ---------  ---------  --------------------
https://github.com/google/cdc-file-transfer                   2                 835        2          2,835

https://tynan.com/letstalk/                                   2                 671        1          1,671

http://www.bay12forums.com/smf/index.php?topic=181050.0       2                 651        2          2,651

https://ianbicking.org/blog/2023/01/infinite-ai-array.html    2                 586        6          586,6

https://maggieappleton.com/ai-dark-forest                     3                 432        3          3,3,432

https://www.quantamagazine.org/long-out-of-math-an-ai-progra  2                 420        6          6,420
mmer-cracks-a-pure-math-problem-20230103/

https://vadimkravcenko.com/shorts/things-they-didnt-teach-yo  2                 402        1          1,402
u/

https://unchartedterritories.tomaspueyo.com/p/transportation  2                 360        1          1,360
-tech-shaped-empires

                                                              9                 341        1          341,1,3,2,1,2,2,2,16

with subs as (
    select
        id
        , url
        , max(score) as score
        , (max(cumulativeUpvotes) + 2.3)/(max(cumulativeExpectedUpvotes) + 2.3) as upvoteRate
    from stories join dataset using (id)
    group by id
    having max(cumulativeExpectedUpvotes) > 10
)
select url, count(*) submission_count, max(score) as max_score, min(score) as min_score
,  group_concat(score) as scores
, group_concat(upvoteRate) as upvoteRates
from subs
group by 1
having submission_count > 1
and max(score) - min(score) > 50
order by max_score desc
limit 30;
url                                                           submission_count  max_score  min_score  scores   upvoteRates
------------------------------------------------------------  ----------------  ---------  ---------  -------  -----------------------------------
https://pytorch.org/blog/compromised-nightly-dependency/      2                 324        191        324,191  1.43000362202034,2.09236212050415

https://scottaaronson.blog/?p=6957                            2                 228        16         16,228   0.883265772640144,0.716479188526392

https://gizmodo.com/dnd-wizards-of-the-coast-ogl-1-1-open-ga  2                 199        25         199,25   1.26440357080483,1.46305528976043
ming-license-1849950634


Here are a couple more, after removing an unnecessary constraint above:
https://www.nature.com/articles/s41586-022-05543-x            2                 72         70         70,72    0.68,0.81

https://www.quantamagazine.org/the-physics-principle-that-in  2                 65         17         17,65    0.86,0.61
spired-modern-ai-art-20230105/

fdietze avatar Feb 06 '23 16:02 fdietze