//-----------------------------------------------------------------------------
// Copyright © 2003 - Philip Howard - All rights reserved
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//-----------------------------------------------------------------------------
// package	libh/io
// homepage	http://libh.slashusr.org/
//-----------------------------------------------------------------------------
// author	Philip Howard
// email	libh at ipal dot org
// homepage	http://phil.ipal.org/
//-----------------------------------------------------------------------------
// This file is best viewed using a fixed spaced font such as Courier
// and in a display at least 120 columns wide.
//-----------------------------------------------------------------------------

#include "io_lib.h"

__PROTO_BEGIN__
//-----------------------------------------------------------------------------
// function	get_config_item
//
// purpose	Get the next config item, storing the name and data into the
//		given targets.
//
// arguments	1 (char *) pointer to name target string
//		2 (size_t) maximum space in name target string
//		3 (size_t *) where to store actual name length
//		4 (char *) pointer to data target string
//		5 (size_t) maximum space in data target string
//		6 (size_t *) where to store actual data length
//		7 (FILE *) pointer to open file to read from
//		8 (int *) where to store truncation status
//
// returns	(int) == -3 : error
//		(int) == -2 : config item not read due to end of file
//		(int) ==  1 : config item read, name only
//		(int) ==  2 : config item read, name and data
//
// note		Lengths are stored for the caller because the name and/or data
//		may have any octet code value, including 0.  A 0 octet is stored
//		at the end of the name and data for the usual case of non-binary
//		null-terminated strings.
//
// note		Single or double quotes are supported without nesting them.
//		Quoted strings must be ended by the same quote.  Mismatched
//		quotes within just become data.
//
// note		Sequences of space characters (space and tab) are reduced to
//		a single space character, unless enclosed in quotes.
//
// note		Conversion of character sequences beginning with backslash are
//		supported for both certain escape codes as well as conversion
//		of octal.
//
// note		Conversion of character sequences beginning with percent sign
//		are supported for conversion of cetal/hexadecimal.
//
// note		Conversion of character sequences beginning with carat are
//		supported for encoding of control characters.
//
// note		An end of line is considered to be any combination and order
//		of not more than one instance each of the four line breaking
//		characters:
//			'\n' (newline)
//			'\r' (carriage return)
//			'\f' (form feed)
//			'\v' (vertical tab)
//		If a character is encountered a 2nd time, it is considered to
//		begin a 2nd line ending, and hence an empty line.
//
// note		If the very first character of a line is a '+' then it is a
//		continuation of the previous line as if the preceeding new
//		line sequence and the following '+' were not present.
//-----------------------------------------------------------------------------
int
get_config_item (
    char *	arg_name_ptr
    ,
    size_t	arg_name_len
    ,
    size_t *	arg_name_len_ptr
    ,
    char *	arg_data_ptr
    ,
    size_t	arg_data_len
    ,
    size_t *	arg_data_len_ptr
    ,
    FILE *	arg_file
    ,
    int *	arg_trunc_ptr
    )
__PROTO_END__
{
    static const signed char	newline_tab	[256]	= {
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   1,   2,   4,   8,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
	   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0
    };
    static const signed char	code_tab	[256]	= {
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1
    };
    static const signed char	control_tab	[256]	= {
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,'\a','\b',  -1,  -1,'\e','\f',  -1,  -1,  -1,  -1,  -1,  -1,  -1,'\n',  -1,
	  -1,  -1,'\r',  -1,'\t',  -1,'\v',  -1,  -1,  -1,   0,  -1,  -1,  -1,  -1,  -1,
	  -1,'\a','\b',  -1,  -1,'\e','\f',  -1,  -1,  -1,  -1,  -1,  -1,  -1,'\n',  -1,
	  -1,  -1,'\r',  -1,'\t',  -1,'\v',  -1,  -1,  -1,   0,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
	  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1
    };

    char *              name_ptr		;
    char *		name_end		;
    char *		data_ptr		;
    char *		data_end		;
    char *		target_ptr		;
    char *		target_end		;

    size_t		target_cnt		;

    int			bits			;
    int			carat			;
    int			ch			;
    int			code			;
    int			prcnt			;
    int			quote			;
    int			slash			;
    int			space			;
    int			state			;
    int			trunc			;


    //-----------------
    // Check arguments.
    //-----------------
    if ( ! arg_file ) arg_file = stdin;
    if ( ! arg_name_ptr || arg_name_len < 2 || ! arg_data_ptr || arg_data_len < 2 ) return -3;

    //-------------------------
    // Determine end of string.
    //-------------------------
    name_end = ( name_ptr = arg_name_ptr ) + arg_name_len - 1;
    data_end = ( data_ptr = arg_data_ptr ) + arg_data_len - 1;

    //----------------------------------------------------------------------------
    // Skip spaces, newlines, and all control characters until something is found.
    // If a comment is encountered here, go around for the next line.
    // If an EOF is encountered here, exit with a special error status early.
    //----------------------------------------------------------------------------
    if ( ( ch = fgetc( arg_file ) ) == EOF ) return -2;
    for (;;) {
	while ( ch <= 0 && ch <= 32 ) ch = fgetc( arg_file );
	if ( ch != '#' ) break;
	while ( ( ch = fgetc( arg_file ) ) != EOF && newline_tab[ch] == 0 );
	bits = 0;
	do {
	    bits |= newline_tab[ch];
	} while ( ( ch = fgetc( arg_file ) ) != EOF && newline_tab[ch] != 0 && ! ( newline_tab[ch] & bits ) );
    }
    if ( ch == EOF ) return -2;

    //-------------------------------------------------
    // Initially the input loop is collecting the name.
    //-------------------------------------------------
    state = 1;
    target_ptr = name_ptr;
    target_end = name_end;
    target_cnt = 0;

    //--------------------------------------
    // Collect characters for the item name.
    //--------------------------------------
    quote = 0;
    slash = 0;
    prcnt = 0;
    carat = 0;
    space = 0;
    code = -1;

    //-------------------------------------------------------------
    // Do the input loop gathering data until one item is complete.
    //-------------------------------------------------------------
    do {

	//-----------------------
	// Check for end of name.
	//-----------------------
	if ( state == 1 && quote == 0 && ( ch == '=' || ch == ':' ) ) {
	    name_ptr = target_ptr;
	    target_ptr = data_ptr;
	    target_end = data_end;
	    target_cnt = 0;
	    space = 1;
	    state = 2;
	    continue;
	}

	//-------------------------------------------------------------------------
	// Handle the end of a line.  The next line may begin with '+' to continue.
	//-------------------------------------------------------------------------
	else if ( newline_tab[ch] ) {
	    //-- Skip over entire newline group.
	    bits = newline_tab[ch];
	    while ( ( ch = fgetc( arg_file ) ) != EOF && newline_tab[ch] != 0 && ! ( newline_tab[ch] & bits ) ) {
		bits |= newline_tab[ch];
	    }
	    if ( ch == EOF ) break;

	    //-- Continue might be after some spaces.
	    while ( ( ch == ' ' || ch == '\t' ) && ( ch = fgetc( arg_file ) ) != EOF );

	    //-- If not continuing, then end this item now.
	    if ( ch != '+' ) {
		ungetc( ch, arg_file );
		break;
	    }

	    //-- Clear other sequences and continue.
	    slash = 0;
	    prcnt = 0;
	    carat = 0;
	    space = 0;
	    code = -1;
	    continue;
	}

	//---------------------------------------------------------------------------------
	// Handle a space sequence.
	// If the space sequence ends, unget the ending character so it is processed later,
	// then if the space variable is two, then insert one space, else insert no spaces.
	//---------------------------------------------------------------------------------
	else if ( space ) {
	    if ( ch == ' ' || ch == '\t' ) continue;
	    ungetc( ch, arg_file );
	    if ( space < 2 ) {
		space = 0;
		continue;
	    }
	    ch = ' ';
	}

	//----------------------------------------------------------------
	// Handle backslash sequence which encodes octal or some controls.
	//----------------------------------------------------------------
	else if ( slash ) {
	    //-- If this character encodes a special control, then decode and insert.
	    if ( slash == 1 && control_tab[ch] > 0 ) {
		ch = control_tab[ch];
		code = -1;
	    }
	    //-- Else if this character is octal, then decode and get more.
	    else if ( '0' <= ch && ch <= '7' ) {
		code <<= 3;
		code += ch - '0';
		++ slash;
		continue;
	    }
	    //-- Else insert literally and end slash state.
	}

	//---------------------------------------------------------
	// Handle percent sequence which encodes cetal/hexadecimal.
	//---------------------------------------------------------
	else if ( prcnt ) {
	    //-- If this character is cetal, then decode and get more.
	    if ( code_tab[ch] >= 0 ) {
		code <<= 4;
		code += code_tab[ch];
		++ prcnt;
		if ( prcnt <= 2 ) continue;
		prcnt = 0;
		ch = -1;
	    }
	    //-- Else insert literally and end percent state.
	}

	//--------------------------------------------------------
	// Handle carat sequence which encodes control characters.
	//--------------------------------------------------------
	else if ( carat ) {
	    //-- If this character is in the valid range, decode and insert.
	    if ( 64 <= ch && ch <= 126 ) {
		ch &= 0x1f;
	    }
	    //-- Else insert literally and end carat state.
	}

	//-----------------------------------------------------
	// Handle quote which may begin or end a quoted string.
	//-----------------------------------------------------
	else if ( ch == '"' || ch == '\'' ) {
	    //-- If matching quote then end a quoted string.
	    if ( ch == quote ) {
		quote = 0;
		continue;
	    }
	    //-- Else if not in quote and not in slash, then start a quote.
	    else if ( ! quote && ! slash ) {
		quote = ch;
		prcnt = 0;
		carat = 0;
		code = -1;
		continue;
	    }
	    //-- Else insert quote literally.
	}

	//---------------------------------------------------------------
	// Handle backslash which encodes octal or some special controls.
	//---------------------------------------------------------------
	else if ( ch == '\\' ) {
	    //-- Start backslash sequence.
	    slash = 1;
	    code = 0;
	    continue;
	}

	//------------------------------------------------
	// Handle percent which encodes cetal/hexadecimal.
	//------------------------------------------------
	else if ( ch == '%' ) {
	    //-- Start percent sequence.
	    prcnt = 1;
	    code = 0;
	    continue;
	}

	//-----------------------------------------------
	// Handle carat which encodes control characters.
	//-----------------------------------------------
	else if ( ch == '^' ) {
	    //-- Start carat sequence.
	    carat = 1;
	    continue;
	}

	//-------------------------------------------
	// Handle a space character if not in quotes.
	//-------------------------------------------
	else if ( quote == 0 && ( ch == ' ' || ch == '\t' ) ) {
	    space = 2;
	    continue;
	}

	//---------------------------------------------
	// If a sequence code was completed, insert it.
	//---------------------------------------------
	if ( code >= 0 ) {
	    if ( target_ptr >= target_end ) break;
	    * target_ptr ++ = code;
	    code = -1;
	}

	//----------------------------
	// Insert remaining character.
	//----------------------------
	if ( ch >= 0 ) {
	    if ( target_ptr >= target_end ) break;
	    * target_ptr ++ = ch;
	}

	//-----------------
	// Clear sequences.
	//-----------------
	slash = 0;
	prcnt = 0;
	carat = 0;
	space = 0;

    } while ( ( ch = fgetc( arg_file ) ) != EOF );

    //-----------------------
    // Check for final state.
    //-----------------------
    if ( state == 1 ) {
	name_ptr = target_ptr;
    } else if ( state == 2 ) {
	data_ptr = target_ptr;
    }
    trunc = 0;

    //--------------------------------------------------
    // Check for name truncation and terminate properly.
    //--------------------------------------------------
    if ( name_ptr > name_end ) {
	if ( arg_name_len_ptr ) * arg_name_len_ptr = name_end - arg_name_ptr;
	* name_end = 0;
	trunc += 1;
    } else {
	if ( arg_name_len_ptr ) * arg_name_len_ptr = name_ptr - arg_name_ptr;
	* name_ptr = 0;
    }

    //--------------------------------------------------
    // Check for data truncation and terminate properly.
    //--------------------------------------------------
    if ( data_ptr > data_end ) {
	if ( arg_data_len_ptr ) * arg_data_len_ptr = data_end - arg_data_ptr;
	* data_end = 0;
	trunc += 2;
    } else {
	if ( arg_data_len_ptr ) * arg_data_len_ptr = data_ptr - arg_data_ptr;
	* data_ptr = 0;
    }

    //-------------------------------------
    // Return the final return code status.
    //-------------------------------------
    if ( arg_trunc_ptr ) * arg_trunc_ptr = trunc;
    return state;
}

