//-----------------------------------------------------------------------------
// Copyright © 2003 - Philip Howard - All rights reserved
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//-----------------------------------------------------------------------------
// package	libh/map
// homepage	http://libh.slashusr.org/
//-----------------------------------------------------------------------------
// author	Philip Howard
// email	libh at ipal dot org
// homepage	http://phil.ipal.org/
//-----------------------------------------------------------------------------
// This file is best viewed using a fixed spaced font such as Courier
// and in a display at least 120 columns wide.
//-----------------------------------------------------------------------------

//-----------------------------------------------------------------------------
// program	wordcount
//
// purpose	Output the number of unique words in the input.
//
// syntax	wordcount ["sepchars"] < infile
//-----------------------------------------------------------------------------
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define WORDSIZE	256

//-----------------------------------------------------------------------------
// function	readword
//-----------------------------------------------------------------------------
static
int
readword (
    char *	arg_buffer
    ,
    size_t	arg_length
    ,
    char *	arg_seps
    )
{
    char *	p	;
    char *	q	;
    int		c	;

    //-- Note where the end of the buffer is.
    q = ( p = arg_buffer ) + arg_length - 1;

    //-- Skip all leading separator characters.
    while ( (c = getc( stdin )) != EOF && arg_seps[ c ] );

    //-- Get characters until end of word, store until full.
    while ( c != EOF && arg_seps[ c ] == 0 ) {
	if ( p < q ) * p ++ = c;
	c = getc( stdin );
    }
    * p = 0;

    //-- Return 0 if EOF, 1 if a word found.
    return ( c == EOF );
}

//-----------------------------------------------------------------------------
// function	main
//-----------------------------------------------------------------------------
int
main (
    int		argc
    ,
    char * *	argv
    )
{
    unsigned long	wcount		;

    char		word		[ WORDSIZE ];
    char		seps		[ 256 ];


    //------------------------------------------------------------------------
    // Initialize the separator table that is given to readword().  This table
    // tells it what characters separate each word.  Control characters are
    // always placed in the table.  If no explicit characters are given in the
    // third argument then all space and punctuation characters are added by
    // default.  Otherwise just the given characters are added.
    //------------------------------------------------------------------------
    {
	int i;

	for ( i = 0; i < 256; ++ i ) seps[ i ] = iscntrl( i );
	if ( argc > 1 && argv[ 1 ] ) {
	    char * p;
	    p = argv[ 1 ];
	    while ( * p ) seps[ 255 & * p ++ ] = 1;
	} else {
	    for ( i = 0; i < 256; ++ i ) {
		if ( ispunct( i ) || isspace( i ) ) seps[ i ] = 1;
	    }
	}
    }

    //-------------------------------
    // Read each word and count them.
    //-------------------------------
    wcount = 0;
    while ( 0 == readword( word, WORDSIZE, seps ) ) {
	++ wcount;
    }

    printf( "%lu\n", wcount );
    return 0;
}

