/*
 *  libbab.c -- BabyTrans ( Babylon Translator front-end for GTK )
 *
 *  Copyright (C) September 1999  Frederic Jolliton -- <fjolliton@chez.com>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 */

/*

 Functions
 ---------

 int				babInit( const char * path ) ;
 int				babEnd( void ) ;

 struct bab_def *	babDefinition( const char * word ) ;
 int				babFree( struct bab_def * def ) ;

 Structure
 ---------

 struct bab_def {
   struct bab_def * next ;
   char * word ;
   char * attrib ;
   char * definition ;
 } ;

*/


#include <errno.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include "types.h"
#include "libbab.h"
#include <kapp.h>

// #define DEBUG
#if 1
#define warn(s) fprintf(stderr,__FILE__"(%d): %s\n",__LINE__,(s))
#else
#define warn()
#endif

/* some utilities functions */

/* Read a unsigned n*8 bits value */
static U4 read_u(int n, FILE * f )
{
	U4 r=0;
	int	i=0 , c=EOF ;
	for ( i = 0 ; i < n ; i ++ ) {
		if ( ( c = fgetc( f ) ) == EOF ) {
			warn( i18n("EndOfFile(EOF) encountered") ) ;
			return 0 ;
		}
		r |= ( (U4)(U1)c ) << ( 8 * i ) ;
	}
	return r;
}

static char * compact_table[] = {
	"<0>", "ion", "ies", "ing", "ous", "ses", "al" , "an", /*  0 ..  7 */
	"at" , "ed" , "en" , "er" , "es" , "ia" , "ic" , "ie", /*  8 .. 15 */
	"in" , "io" , "is" , "it" , "le" , "ly" , "ne" , "on", /* 16 .. 23 */
	"or" , "ra" , "se" , "ss" , "st" , "te" , "ti" , "th", /* 24 .. 31 */
};

BabylonC::BabylonC(const char *Directory="",const char *Filename = "")
{
	QString filename(Directory);

  word=NULL; word_length=0; bab_init=0; file_idx=NULL; file_def=NULL;

	if ( Directory==NULL || Filename==NULL) {
		warn( "Filename is NULL" ) ;
		return;
	}

	word = new char[MAX_WORD_LENGTH + 1];
	if ( word == NULL ) {
		return;
	}
	
  filename=Directory;
  filename+=Filename;
  printf("BabFile: %s\n",(const char *)filename);
	file_def = fopen( filename , FILE_READ_BINARY ) ;
	if ( file_def == NULL ) {
		perror( filename ) ;
		delete word;
		return;
	}
  filename=Directory;
  filename+="english.dic";

  printf("BabFile: %s\n",(const char *)filename);
	file_idx = fopen( filename , FILE_READ_BINARY ) ;
	if ( file_idx == NULL ) {
		perror( filename ) ;
		delete word;
		fclose( file_def ) ;
		return;
	}

	/* All is OK ! */
	bab_init=1;
}

/* Cleanup */
BabylonC::~BabylonC()
{
	if ( !bab_init ) return;
	delete word;
	fclose( file_def ) ;
	fclose( file_idx ) ;
}

int BabylonC::babFree( struct bab_def* def )
{
	struct bab_def*		current ;
	struct bab_def*		next ;

	current = def ;
	while ( current != NULL ) {
		next = current->next ;
		assert( current->word ); delete current->word;
		assert( current->attrib ); delete current->attrib;
		assert( current->definition ); delete current->definition;
		delete current;
		current = next ;
	}

	return 0 ;
}

long BabylonC::babIndex( const char* word )
{
	long				idx = 0 ;

	if ( word_length >= 1 )
		idx += ( word[ 0 ] - 'a' + 2 ) * 28 * 28;
	if ( word_length >= 2 )
		idx += ( word[ 1 ] - 'a' + 2 ) * 28 ;
	if ( word_length >= 3 )
		idx += word[ 2 ] - 'a' + 2 ;
	return idx ;
}

char BabylonC::bab_to_char( int c )
{
	if ( c < 26 )
		return c + 'a' ;

	switch ( c ) {
	case 27:
		return ' ' ;
	case 30:
		return '\'' ;
	default:
		return '?' ;
	}
}

void BabylonC::babReadDef( char* dst , FILE* src , int size )
{
	U2					data ;
	/* Read text definition */
	while ( size > 0 )
  {
		data = read_u(2, src ) ;
		if ( data < 32768 )
    {
			*( dst ++ ) = bab_to_char( (int)( data & 0x1F ) ); data >>= 5 ;
			*( dst ++ ) = bab_to_char( (int)( data & 0x1F ) ); data >>= 5 ;
			*( dst ++ ) = bab_to_char( (int)( data & 0x1F ) ); size -= 3 ;
		}
    else
    {
			int					lsb = (int)( data & 0x7F ) ;
			int					msb = (int)( ( data >> 8 ) & 0x7F ) ;

			if ( lsb >= 32 )
      {
				*( dst ++ ) = lsb ;
				-- size ;
			}
      else if ( lsb >= 6 )
      {
				strcpy( dst , compact_table[ lsb ] ) ;
				dst += 2 ;
				size -= 2 ;
			}
      else
      {
				strcpy( dst , compact_table[ lsb ] ) ;
				dst += 3 ;
				size -= 3 ;
			}
			if ( size == 0 ) break ;
			if ( size < 0 )
      {
				warn( i18n("buffer overflow") ) ;
				return ;
			}

			if ( msb >= 32 )
      {
				*( dst ++ ) = msb ;
				size-- ;
			}
      else if ( msb >= 6 )
      {
				strcpy( dst , compact_table[ msb ] ) ;
				dst += 2 ;
				size -= 2 ;
			}
      else
      {
				strcpy( dst , compact_table[ msb ] ) ;
				dst += 3 ;
				size -= 3 ;
			}
		}
	}
	( *dst ) = 0 ;
}

/* Make a new definition node */
/* pos indicate the position of the definition in file_def file */
struct bab_def *BabylonC::babMakeDef( FILE* file_def , fpos_t pos )
{
	int sz ;
	struct bab_def *def ;
	char *p ;
	U1 mask , c , pc ;
	int i ;

	/* allocate and initialize a bab_def structure */

	def = new bab_def;
	if ( def == NULL ) return NULL ;

	def->word = NULL ;
	def->attrib = NULL ;
	def->definition = NULL ;
	def->next = NULL ;

	fseek( file_def , pos , SEEK_SET ) ;

	p = new char[10 + 1];
	if ( p == NULL ) {
		delete def;
		return NULL ;
	}
  def->attrib=p;
  p=attrib_data;
	for ( i = 0 ; i < 7 ; i++ )
    p += sprintf( p , "%02X" , read_u(1, file_def) );

  if ( attrib_data[ 2 ] > '1' || attrib_data[ 3 ] != '0' ) strcpy(def->attrib,i18n("(n.)"));
  else if ( attrib_data[ 6 ] == '1' ) strcpy(def->attrib,i18n("(a.)"));
  else if ( attrib_data[ 6 ] != '0' || attrib_data[ 9 ] != '0' ) strcpy(def->attrib,i18n("(v.)"));
  else if ( attrib_data[ 7 ] != '0' ) strcpy(def->attrib,i18n("(a.)"));
  else if ( attrib_data[ 8 ] == '1' ) strcpy(def->attrib,i18n("(adv.)"));
  else strcpy(def->attrib,"");


	sz = read_u(1, file_def ) ;
	p = new char[sz + 1] ;
	if ( p == NULL ) {
		delete def->attrib;
		delete def;
		return NULL;
	}
	def->word = p ;
	babReadDef( def->word , file_def , sz ) ;

	p = new char[255 + 1];
	if ( p == NULL ) {
		delete def->attrib;
		delete def->word;
		delete def;
		return NULL ;
	}
	def->definition = p ;
	sz = (int)read_u(1, file_def ) ;
	if ( sz == 0 )
		warn( i18n("Null definition") ) ;

	mask = 0 ;
	c = 0 ;
	while ( sz > 0 ) {
		pc = c ;
		c = read_u(1, file_def ) ^ mask ;
		mask ^= 0x80 ;
#ifdef DEB
		printf( "[%02X '%02X'] ", (int)pc , (int)c ) ;
#endif
		if ( c == 0 ) {
#ifdef DEB
			printf( i18n("null char\n") ) ;
#endif
			continue ;
		}
		if ( pc == 0x7D ) {
			if ( c == 0XEF ) {
				*( p ++ ) = ',' ;
				*( p ++ ) = ' ' ;
				*( p ++ ) = ' ' ;
				sz -= 3 ;
#ifdef DEB
				printf( i18n("sequence (3)\n") ) ;
#endif
			} else {
				*( p ++ ) = pc ;
				*( p ++ ) = c ;
				sz -= 2 ;
#ifdef DEB
				printf( i18n("put 0xFB and current char. '%c'\n") , c ) ;
#endif
			}
		} else if ( pc == 0x7F ) {
			if ( c == 0xEF ) {
				*( p ++ ) = '.' ;
				*( p ++ ) = ' ' ;
				sz -= 3 ; // Considered as 3 chars.. Hum..
			}
			else {
#ifdef DEB
				printf( i18n("(unknown byte pair) ") ) ;
#endif
			}
#ifdef DEB
			printf( i18n("sequence (2)\n") ) ;
#endif
		} else if ( pc == 0xFF ) {
			*( p ++ ) = '.' ;
			*( p ++ ) = '.' ;
			if ( c == 0xF3 ) // [FF F3]
				*( p ++ ) = ';' ;
			else if ( c == 0xEF ) // [FF EF]
				*( p ++ ) = ' ' ;
			else if ( c == 0xFF ) // [FF FF]
				*( p ++ ) = '.' ;
			else {
#ifdef DEB
				printf( i18n("(unknown byte pair) ") ) ;
#endif
			}
#ifdef DEB
			printf( i18n("sequence (3)\n") ) ;
#endif
			sz -= 3 ;
			c = 0 ; // 'clear previous char'
		} else if ( pc == 0xFB ) {
			if ( c == 0xFF ) {
				*( p ++ ) = '.' ;
				*( p ++ ) = '.' ;
				sz -= 3 ; // Considered as 3 chars.. Hum..
				c = 0 ; // 'clear previous char'
#ifdef DEB
				printf( i18n("sequence (2/3)\n") ) ;
#endif
			} else {
				*( p ++ ) = pc ;
				*( p ++ ) = c ;
				sz -= 2 ;
#ifdef DEB
				printf( i18n("put 0xFB and current char. '%c'\n") , c ) ;
#endif
			}
			continue ;
		} else if ( c == 0xFB || c == 0xFF || c == 0x7D || c == 0x7F ) {
#ifdef DEB
			printf( i18n("wait\n") ) ;
#endif
		} else {
			*( p++ ) = c ;
			-- sz ;
#ifdef DEB
			printf( i18n("put current char. '%c'\n")  , c ) ;
#endif
		}
	}
#ifdef DEB
	printf("\n");
#endif
	*p = 0 ;

#ifdef DEB
	printf( "[%s]\n" , def->definition ) ;
#endif

	return def ;
}

bab_def *BabylonC::babMain( const char* word )
{
	long				main_index ;
	U4					idx_idx , idx_def ,i , j , nw , wc ;
	U1					min_size , max_size ;
	size_t				byte_to_skip ;
	int					c ;
	struct bab_def*		root ;
	struct bab_def*		current ;
	struct bab_def*		prev ;

	current = prev = root = NULL ;

	/* 1) Compute main index .. */

	main_index = 100 + 4 * babIndex( word ) ;

	/* 2) .. then seek to both file */

	fseek( file_idx , (fpos_t)main_index , SEEK_SET ) ;
	fseek( file_def , (fpos_t)main_index , SEEK_SET ) ;

	/* 3) Read corresponding indexes*/

	idx_idx = read_u(4, file_idx ) ;
	idx_def = read_u(4, file_def ) ;
	if ( idx_def == read_u(4, file_def ) ) {
    #ifdef DEB
		warn( i18n("No word beginning with first 3 letters of the word") ) ;
    #endif
		return NULL;
	}

	fseek( file_idx , (fpos_t)idx_idx , SEEK_SET ) ;

	min_size = read_u(1, file_idx ) ;
	max_size = read_u(1, file_idx ) ;

	if ( ( word_length < min_size ) || ( word_length > max_size ) ) {
    #ifdef DEB
		warn( i18n("No word as long as the wanted word is defined") ) ;
    #endif
		return NULL;
	}

	wc = 0 ;
	byte_to_skip = 0 ;
	for ( i = min_size ; i < word_length ; i++ ) {
		nw = (int)read_u(2, file_idx ) ;
		wc += nw ;
		byte_to_skip += ( i - 3 ) * nw ;
	}
	nw = (int)read_u(2, file_idx ) ;
	byte_to_skip += ( max_size - word_length ) * 2 ;

	fseek( file_idx , byte_to_skip , SEEK_CUR ) ;

	/* Search word index */

	for ( i = 0 ; i < nw ; i++ ) {
		for ( j = 3 ; j < word_length ; j++ ) {
			c = fgetc( file_idx ) ;
			if ( word[ j ] != c )
				break ;
		}
		if ( j == word_length ) {
			U4 def ;

			fseek( file_def , idx_def + 4 * wc , SEEK_SET ) ;
			def = read_u(4, file_def ) ;

			if ( def & 0xFF000000UL ) { /* Another definition reference */
				main_index = 100 + 4 * ( ( def >> 16 ) & 0xFFFF ) ;
				fseek( file_def , (fpos_t)main_index , SEEK_SET ) ;
				idx_def = read_u(4, file_def ) ;

				fseek( file_def , idx_def + 4 * ( def & 0xFFFF ) , SEEK_SET ) ;
				def = read_u(4, file_def ) ;
			}
			current = babMakeDef( file_def , def ) ;
			if ( current != NULL ) {
				if ( root == NULL ) {
					root = current ;
				} else {
					prev->next = current ;
				}
				prev = current ;
			}
		} else {
			/* A charactere has been read without j incremented */
			fseek( file_idx , word_length - j - 1 , SEEK_CUR ) ;
		}
		wc++ ;
	}
	return root;
}


// Diese Funktion wandelt das Wort in p_word in Kleinbuchstaben um, und bergibt dann an babMain.
bab_def *BabylonC::babDefinition( const char * p_word )
{
	int i ;

	if ( bab_init!=1 )
		return NULL;
  if (p_word==NULL) return NULL;

	/* 1) Check that the word contain only alphabetic characters and 
          rewrite word in lower case */

	for ( i = 0 ; i < MAX_WORD_LENGTH && p_word[ i ] ; i++ ) {
		if ( !isalpha( p_word[ i ] ) )
    {
      #ifdef DEB
      printf(i18n("Word: \"%s\"\n"),p_word);
			warn( i18n("Word contains non alpha characters"));
      #endif
			return NULL;
		}
		word[ i ] = tolower( p_word[ i ] ) ;
	}
	word[ i ] = 0 ;
	word_length = i ;

	if ( ( word_length == 0 ) || ( word_length >= MAX_WORD_LENGTH ) ) {
    #ifdef DEB
		warn( i18n("Word size null or too big") ) ;
    #endif
		return NULL;
	}
#ifdef DEBUG
	printf( i18n("Definition of %s:\n") , word ) ;
#endif
	return babMain( word ) ;
};

int BabylonC::html(KHTMLView *html,const char *p_word)
{
  struct bab_def *def, *current;
  QString line;

  if(p_word==NULL) return 0;
  def = babDefinition( p_word );
  if (def==NULL) return 0;

  line.sprintf( i18n("Babylons translation of %s: ") , p_word );
  html->write(line);
  current = def ;
  if ( current == NULL )
  {
    printf( i18n("None\n") ) ;
    return 0;
  }
  else
    while ( current != NULL ) 
    {
      line.sprintf( "%s <font color=\"#0000ff\">%s</font><br> %s<p>\n", current->word, current->attrib,
        current->definition );
      html->write(line);
      current = current->next ;
    }
  babFree(def);
  return 1;
}

#if 0
//int main( int argc , char * argv[] )
{
	struct bab_def * def , * current ;
	int return_code ;
	char * word ;

	if ( argc != 2 ) {
		printf( "Usage: libbab word\n" ) ;
		exit( 1 ) ;
	}

	word = argv[ 1 ] ;

	return_code = babInit( "/home/fred/project/bab" ) ;
	if ( return_code < 0 ) {
		fprintf( stderr , "Error while initializing libbab\n" ) ;
		exit( 1 ) ;
	}

	def = babDefinition( word ) ;
	if ( def == NULL )
		return 1 ;
	printf( "Definition of %s:\n" , word ) ;
	fflush( stdout ) ;

	current = def ;
	if ( current == NULL ) {
		printf( "None\n" ) ;
	} else while ( current != NULL ) {
		printf( "'%s' (%s): [%s]\n" , current->word , current->attrib , current->definition ) ;
		current = current->next ;
	}

	putchar( '\n' ) ;

	babEnd() ;

	return 0 ;
}
#endif


