#!/usr/bin/perl

################################################################
# sgmlthtml: 
# Run sgml2html and convert latin symbols to Thai characters.
# This program will change, for example, &agrave; to "\340".
#
# by Poonlap Veeratanabutr <poon-v@fedu.uec.ac.jp> 
# $Id: sgmlthtml,v 1.1 1998/09/28 06:51:52 poon-v Exp poon-v $
################################################################


sub error {
    print STDERR "usage: $0 [-c] file.sgml\n";
    print STDERR "       -c , use cttex to fill <WBR>\n";
    exit; 
}    

# command line processing.
if( $#ARGV == -1 ){
    &error;
} elsif ( $#ARGV == 0 && $ARGV[0] ne "-c"){
    $file = $ARGV[0];
    $cut = 0;
} elsif ( $#ARGV == 1 ){
    if( $ARGV[0] eq "-c"){
	$cut = 1;
	$file = $ARGV[1];
    } elsif( $ARGV[1] eq "-c"){
	$cut = 1;
	$file = $ARGV[0];
    } else {
	&error;
    }
} else {
    &error;
}

# run sgml2html
if( system( "sgml2html $file" ) != 0 ){
    exit;
}

# lookup table for what to change and not to change
%lookuptbl = (quot, "quot",amp,"amp","lt","lt","gt","gt",copy,"\251",reg,"\256",
	      micro,"\265",Agrave,"\300",Aacute,"\301",Acirc,"\302",Atilde,"\303",
	      Auml,"\304",Aring,"\305",AElig,"\306",Ccedil,"\307",Egrave,"\310",
	      Eacute,"\311",Ecirc,"\312",Euml,"\313",Igrave,"\314",Iacute,"\315",
	      Icirc,"\316",Iduml,"\317",ETH,"\320",eth,"\320",Ntilde,"\321",
	      Ograve,"\322",Oacute,"\323",Ocirc,"\324",Otilde,"\325",Ouml,"\326",
	      Oslash,"\330",Ugrave,"\331",Uacute,"\332",THORN,"THORÑ",Thorn,"Thron",
	      szlig,"\337",agrave,"\340",aacute,"\341",acirc,"\342",atilde,"\343",
	      auml,"\344",aring,"\345",aelig,"\346",ccedil,"\347",egrave,"\350",
	      eacute,"\351",ecirc,"\352",euml,"\353",igrave,"\354",iacute,"\355",
	      icirc,"\356",iuml,"\357",eth,"\360",ntilde,"\361",ograve,"\362",
	      oacute,"\363",ocirc,"\364",otilde,"\365",ouml,"\366",oslash,"\370",
	      ugrave,"\371",uacute,"\372",ucirc,"\373",uuml,"uuml",yacute,"yacute",
	      yuml,"yuml");

# separate directory and file name from the given file name
if( $file =~ /(.*)[\/](.+)$/ ){
    $dir = $1;
    $file = $2;
}
$file =~ s/(.+)\.sgml/$1/; # get rootname

if( length( $dir ) == 0 ){
    opendir( DIR, ".");
} else {
    opendir( DIR, "$dir" );
}
@html = grep {/$file.*\.html$/} readdir( DIR ); # get all related html files
closedir( DIR );

foreach $html_file (@html) {
    print "Processing file $html_file\n";
    open( INPUT, "$html_file");
    open( OUTPUT,">$html_file.tmp" );    
    
    while( <INPUT> ){
	$line = $_;
	$beg = index( $line, "&");
	$end = index( $line, ";");
	while( $beg >= 0  && $end > $beg  ){
	    $target = substr( $line, $beg+1, $end-$beg-1);
	    $thai = $lookuptbl{ $target };
	    if( $target eq $thai ){
		$line = sprintf( "%s%s%s", substr( $line, 0, $beg), '&' . $thai . ';', 
				substr( $line, $end+1 ));
		
	    } else {
		$line = sprintf( "%s%s%s", substr( $line, 0, $beg), $thai, 
				substr( $line, $end+1 ));
	    }
	    $beg = index( $line, "&", $beg+1);
	    $end = index( $line, ";", $beg);
	}
	print OUTPUT "$line";
    }
    close( INPUT );
    close( OUTPUT );
    if( $cut == 1 ){
	system( "cttex 0 < $html_file.tmp > $html_file" );
	system( "rm -f $html_file.tmp" );
    } else {
	rename( "$html_file.tmp", "$html_file");
    }
}

# EOF







