+--------+--------+--------+-------+
| bookid | word1 | word2 | count |
+--------+--------+--------+-------+
| 31 | 589 | 55019 | 1 |
| 38 | 101 | 708 | 1 |
| 41 | 671 | 3341 | 1 |
| 45 | 13 | 86 | 2 |
| 50 | 108 | 1962 | 1 |
| 52 | 132 | 34 | 1 |
| 54 | 674 | 28 | 1 |
| 54 | 2 | 5062 | 1 |
| 56 | 7 | 1646 | 1 |
| 58 | 17406 | 6955 | 1 |
| 69 | 1979 | 58 | 1 |
| 70 | 138 | 223460 | 2 |
| 90 | 5 | 371422 | 1 |
| 91 | 2841 | 671 | 1 |
| 93 | 2844 | 2380 | 1 |
| 96 | 132 | 100 | 1 |
| 105 | 50 | 415 | 1 |
| 106 | 18 | 385 | 1 |
| 107 | 5072 | 4 | 1 |
| 107 | 60 | 690 | 1 |
| 108 | 182 | 131 | 1 |
| 108 | 2482 | 189 | 1 |
| 108 | 5 | 25972 | 1 |
| 111 | 2 | 7363 | 1 |
| 114 | 209340 | 605 | 1 |
+--------+--------+--------+-------+
+--------+------+---------+-------+-----------------------------------+
| bookid | year | nwords | state | title |
+--------+------+---------+-------+-----------------------------------+
| 245588 | 0 | 2613020 | NULL | Encyclop... |
| 282996 | 1997 | 165288 | AZ | Richard Hooker and the constru... |
| 519641 | 1997 | 219423 | AZ | Medieval Gospel of Nicodemus... |
| 907596 | 1876 | 25379 | NY | science and art of education... |
| 93322 | 1997 | 2466 | NY | Clueless in Tokyo... |
| 7807 | 1997 | 65108 | AZ | poems of Alcimus Ecdicius Avit... |
| 492289 | 1996 | 123463 | KY | McKendree College... |
| 987841 | 1993 | 38083 | DC | Global climate change... |
| 679078 | 1993 | 46723 | DC | North American Free Trade Agre... |
| 244189 | 1993 | 41330 | DC | Seafood safety... |
| 609042 | 1993 | 71475 | DC | North American Free Trade Agre... |
| 77609 | 1993 | 100893 | DC | Pending indoor air quality and... |
| 108978 | 1993 | 103274 | DC | Federal disaster policy and fu... |
| 961066 | 1993 | 52310 | DC | Pending nuclear legislation... |
| 190457 | 1993 | 111241 | DC | Impacts of trade agreements on... |
| 853339 | 1993 | 26997 | DC | Developments in the Middle Eas... |
| 818336 | 1993 | 31074 | DC | Review of major Census Bureau ... |
| 368923 | 1993 | 63090 | DC | Environmental aspects of the N... |
| 349342 | 1993 | 45708 | DC | Operations of the Congress... |
| 120630 | 1993 | 47451 | DC | Independent Counsel Reauthoriz... |
| 286820 | 1993 | 18680 | DC | Nomination of James Lee Witt... |
| 488341 | 1993 | 38158 | DC | Impact of federal mandated mar... |
| 298481 | 1993 | 163082 | DC | Mineral Exploration and Develo... |
+--------+------+---------+-------+-----------------------------------+
+--------+--------+--------+-------+
| bookid | word1 | word2 | count |
+--------+--------+--------+-------+
| 31 | 589 | 55019 | 1 |
| 38 | 101 | 708 | 1 |
| 41 | 671 | 3341 | 1 |
| 45 | 13 | 86 | 2 |
| 50 | 108 | 1962 | 1 |
| 52 | 132 | 34 | 1 |
| 54 | 674 | 28 | 1 |
| 54 | 2 | 5062 | 1 |
| 56 | 7 | 1646 | 1 |
| 58 | 17406 | 6955 | 1 |
| 69 | 1979 | 58 | 1 |
| 70 | 138 | 223460 | 2 |
| 90 | 5 | 371422 | 1 |
| 91 | 2841 | 671 | 1 |
| 107 | 5072 | 4 | 1 |
| 107 | 60 | 690 | 1 |
| 108 | 182 | 131 | 1 |
| 108 | 2482 | 189 | 1 |
| 108 | 5 | 25972 | 1 |
| 111 | 2 | 7363 | 1 |
| 114 | 209340 | 605 | 1 |
+--------+--------+--------+-------+
+--------+-------------+-------------+------------+----------+
| wordid | casesens | lowercase | stem | IDF |
+--------+-------------+-------------+------------+----------+
| 4211 | wore | wore | wore | 0.866897 |
| 5088 | HE | he | he | 1.47661 |
| 8598 | Pieces | pieces | piece | 2.72216 |
| 19913 | Japan's | japan's | NULL | 2.86523 |
| 23351 | testament | testament | Testament | 2.34616 |
| 24504 | legged | legged | legs | 1.99477 |
| 27639 | como | como | como | 3.73671 |
| 29339 | shrinkage | shrinkage | shrinkage | 2.71933 |
| 35089 | Nina | nina | Nina | 3.09251 |
| 45784 | BERKELEY | berkeley | Berkeley | 3.23562 |
| 47416 | Heretical | heretical | heretics | 5.48773 |
| 52509 | cudgel | cudgel | cudgel | 3.79054 |
| 58293 | divino | divino | divino | 4.6265 |
| 62064 | ironing | ironing | iron | 3.14454 |
| 71846 | QUEEN'S | queen's | NULL | 4.78766 |
| 71884 | Gentoo | gentoo | Gentoo | 6.16556 |
| 77941 | attenuating | attenuating | attenuated | 4.15506 |
| 78677 | synthetase | synthetase | synthetase | 4.62012 |
| 81473 | 2020 | 2020 | NULL | 3.40792 |
| 81841 | quartets | quartets | quartet | 4.40757 |
| 85370 | shallop | shallop | shallop | 5.08674 |
| 87977 | warres | warres | warre | 5.57029 |
+--------+-------------+-------------+------------+----------+
SELECT
year,classification,
IFNULL(numerator.WordCount,0)*100000000/IFNULL(denominator.WordCount,0)/100 as WordsPerMillion
FROM
(
SELECT
lc1,year,
sum(main.count) as WordCount
FROM
fastcat
JOIN
master_bookcounts as main
ON (fastcat.bookid=main.bookid)
JOIN ( wordsheap as words1) ON (main.wordid = words1.wordid)
WHERE
(( ( (year<=1922) ) ) AND ( ( (year>=1850) ) ))
AND (( ( (words1.casesens = (SELECT casesens FROM wordsheap WHERE
casesens='library')) ) ) OR
( ( (words1.casesens = (SELECT casesens FROM wordsheap WHERE casesens='libraries')) ) ))
GROUP BY
lc1,year
) as numerator
RIGHT OUTER JOIN
(
SELECT
lc1,year,
sum(nwords) as WordCount
FROM
fastcat
WHERE
(( ( (year<=1922) ) ) AND ( ( (year>=1850) ) )) AND TRUE
GROUP BY
lc1,year
) as denominator
USING (lc1,year )
JOIN LCC USING (lc1)
GROUP BY lc1,year;
{
"method": "return_tsv",
"counttype":["WordsPerMillion"],
"search_limits": {
"country": ["USA","UK"],
"word": ["natural selection"]
},
"groups": [
"year"
],
"database": "OL"
}
year WordsPerMillion
[...]
1907 340.20526777
1908 341.83114533
1909 295.24911692
1910 282.24802327
1911 284.92406591
1912 283.89805752
1913 296.87614627
1914 332.76147647
1915 446.39889626
1916 428.87396542
1917 527.51044740
1918 647.48528263
1919 653.05159042
1920 507.23177682
1921 501.77615474
[...]
{
"method": "return_tsv",
"counttype":["WordsPerMillion"],
"search_limits": {"word": [ "natural selection" ]},
"groups": ["state","year"],
"database": "OL"
}
state year WordsPerMillion
[...]
NJ 1901 0E-8
NJ 1902 0E-8
NJ 1903 0.52162392
NJ 1904 0E-8
NJ 1905 0E-8
NJ 1906 0E-8
NJ 1907 0.52719259
NJ 1908 0.59582825
NJ 1909 0.23120944
NJ 1910 1.08461634
[...]
{
"method": "return_tsv",
"counttype":["WordCount"],
"search_limits": {
"year":[1877],
"state":["RI"]
},
"groups": ["unigram","year"],
"database": "presidio"}
unigram year WordCount
[...]
resolve 1877 8
resolved 1877 272
resolves 1877 10
resolving 1877 2
resort 1877 10
resorted 1877 2
resorts 1877 4
resound 1877 1
[...c. 23,000 total rows...]