"eng-ca_web_2002_1M-words.txt",
names=["Rank", "Word", "Frequency"]
wd.Word = wd.Word.apply(str).str.lower()
wd.query("Frequency > 5") # Remove rare words,
.query("Word.str.match('^[A-Za-z]\*$')") # words with non-letters,
.query("Word.str.contains('[aeiouy]')") # abbrvns w/o vowels,
.groupby("Word") # and duplicates
wd["Length"] = pandas.Series(wd.index, index=wd.index).str.len()
sm = wd.groupby("Length")
result = pandas.DataFrame({
"NormalizedCount": sm.Frequency.count() / sm.Frequency.count().sum(),
"NormalizedFrequency": sm.Frequency.sum() / sm.Frequency.sum().sum()