` Count words in a text
` www.scriptol.org
` words counts


` usage:
` si words filename



text cdelimiters = " \\.()[]{},;:?+-*=/&~|\"\'<>!\r\n\t"


` split a line into words, according to list of delimiters

//array splitline(text line, text delims = " \t\r\n")
array splitline(text line, text delims)
    array words = {}
    int index = 0
    text newword = ""

    for text c in line
        if c in delims                ` char in delimiter list
            if (newword <> "") ? words.push(newword)  ` then word ended, add it
            newword = ""            ` clear it
        else                        ` else char in a word
            newword + c        ` add char to word
        /if
    /for

    if newword <> "" ? words.push(newword)     ` add a remaining word
return words


` count occurences of words

text, int, text, int, int wcount(text filename)

    array content
    dict wordlist = dict()              ` dictionary of all words
    content.load(filename)          ` get the text into an array


    print filename, "loaded, found", content.size(), "lines"

    ` now processing each line of the text

    for text line in content
        array words = splitline(line, cdelimiters)  ` make an array of words

        for text w in words
            wordlist[w] + 1                     ` count occurences
        /for
    /for


    ` calculating the min, max

    int less = 65535, most = 0
    text lessword = "", mostword = ""

    array klist = wordlist.keys()

    for text k in klist         ` for each key in dict
        int i = wordlist[k]       ` get the number of occurences
        if i
        < less:  
            less = i
            lessword = k
        > most:  
            most = i
            mostword = k
        /if
    /for

    int total = wordlist.sum()       ` get the total of words in wordlist
    print total

return lessword, less, mostword, most, total

void usage()
    print "Words count - www.scriptol.org"
    print "usage:  compiled:     words filename"
    print "        interpreted:  si words filename"
    exit()
return



` main function

int main(int argnum, array arglist)

    if(argnum <> 2) ? usage()

    text filename = arglist[1]
    print "processing", filename, "..."

    text textl, textm
    int numl, numm, numt

    ` call counter with the file's name
    file f = file(filename)
    if not f.exists()
        print filename, "not found"
        exit(1)
    /if    
    textl, numl, textm, numm, numt = wcount(filename)

    ` display results
    print "Total:", numt, "words"
    print "Less used:", textl, "(" + str(numl) + ")"
    print "Most used:", textm, "(" + str(numm) + ")"
return 0

main($argc, $argv)