` Count words in a text
` www.scriptol.org
` words counts
` usage:
` si words filename
text cdelimiters = " \\.()[]{},;:?+-*=/&~|\"\'<>!\r\n\t"
` split a line into words, according to list of delimiters
//array splitline(text line, text delims = " \t\r\n")
array splitline(text line, text delims)
array words = {}
int index = 0
text newword = ""
for text c in line
if c in delims ` char in delimiter list
if (newword <> "") ? words.push(newword) ` then word ended, add it
newword = "" ` clear it
else ` else char in a word
newword + c ` add char to word
/if
/for
if newword <> "" ? words.push(newword) ` add a remaining word
return words
` count occurences of words
text, int, text, int, int wcount(text filename)
array content
dict wordlist = dict() ` dictionary of all words
content.load(filename) ` get the text into an array
print filename, "loaded, found", content.size(), "lines"
` now processing each line of the text
for text line in content
array words = splitline(line, cdelimiters) ` make an array of words
for text w in words
wordlist[w] + 1 ` count occurences
/for
/for
` calculating the min, max
int less = 65535, most = 0
text lessword = "", mostword = ""
array klist = wordlist.keys()
for text k in klist ` for each key in dict
int i = wordlist[k] ` get the number of occurences
if i
< less:
less = i
lessword = k
> most:
most = i
mostword = k
/if
/for
int total = wordlist.sum() ` get the total of words in wordlist
print total
return lessword, less, mostword, most, total
void usage()
print "Words count - www.scriptol.org"
print "usage: compiled: words filename"
print " interpreted: si words filename"
exit()
return
` main function
int main(int argnum, array arglist)
if(argnum <> 2) ? usage()
text filename = arglist[1]
print "processing", filename, "..."
text textl, textm
int numl, numm, numt
` call counter with the file's name
file f = file(filename)
if not f.exists()
print filename, "not found"
exit(1)
/if
textl, numl, textm, numm, numt = wcount(filename)
` display results
print "Total:", numt, "words"
print "Less used:", textl, "(" + str(numl) + ")"
print "Most used:", textm, "(" + str(numm) + ")"
return 0
main($argc, $argv)