{
"method": "return_tsv",
"counttype":["WordsPerMillion"],
"search_limits": {
"country": ["USA","UK"],
"word": ["natural selection"]
},
"groups": [
"year"
],
"database": "OL"
}
year WordsPerMillion
[...]
1907 340.20526777
1908 341.83114533
1909 295.24911692
1910 282.24802327
1911 284.92406591
1912 283.89805752
1913 296.87614627
1914 332.76147647
1915 446.39889626
1916 428.87396542
1917 527.51044740
1918 647.48528263
1919 653.05159042
1920 507.23177682
1921 501.77615474
[...]
{
"method": "return_tsv",
"counttype":["WordsPerMillion"],
"search_limits": {"word": [ "natural selection" ]},
"groups": ["state","year"],
"database": "OL"
}
state year WordsPerMillion
[...]
NJ 1901 0E-8
NJ 1902 0E-8
NJ 1903 0.52162392
NJ 1904 0E-8
NJ 1905 0E-8
NJ 1906 0E-8
NJ 1907 0.52719259
NJ 1908 0.59582825
NJ 1909 0.23120944
NJ 1910 1.08461634
[...]
{
"method": "return_tsv",
"counttype":["WordCount"],
"search_limits": {
"year":[1877],
"state":["RI"]
},
"groups": ["unigram","year"],
"database": "presidio"}
unigram year WordCount
[...]
resolve 1877 8
resolved 1877 272
resolves 1877 10
resolving 1877 2
resort 1877 10
resorted 1877 2
resorts 1877 4
resound 1877 1
[...c. 23,000 total rows...]
queryA = list("database"="RMP","search_limits" = list("gender"=list("female"),"rHelpful" = list("$lte" = list(2)),"department"=list("Computer Science"),"date_year" = list("$gte"=list(2005))),counttype=list("WordCount"),groups=list("unigram")) queryB = queryA queryB[['search_limits']][['gender']] = list("male") goodwords = compareTwoLanguages(queryA,queryB) historyPositive=goodwords %.% filter(!unigram %in% genderStopwords) %.% filter(-abs(dunning)0,"Female","Male")) ggplot(historyPositive) + geom_bar(aes(y=dunning,x=reorder(unigram,abs(dunning)),fill=genderBias),stat="identity") + coord_flip() + labs(x="Word",y="Overrepresentation (Dunning Log score)") + theme(axis.text=theme_text(size=12)) + labs(title="Gender-specific words in negative CS reviews")