#!/usr/bin/perl ################################################################ # sgmlthtml: # Run sgml2html and convert latin symbols to Thai characters. # This program will change, for example, à to "\340". # # by Poonlap Veeratanabutr # $Id: sgmlthtml,v 1.1 1998/09/28 06:51:52 poon-v Exp poon-v $ ################################################################ sub error { print STDERR "usage: $0 [-c] file.sgml\n"; print STDERR " -c , use cttex to fill \n"; exit; } # command line processing. if( $#ARGV == -1 ){ &error; } elsif ( $#ARGV == 0 && $ARGV[0] ne "-c"){ $file = $ARGV[0]; $cut = 0; } elsif ( $#ARGV == 1 ){ if( $ARGV[0] eq "-c"){ $cut = 1; $file = $ARGV[1]; } elsif( $ARGV[1] eq "-c"){ $cut = 1; $file = $ARGV[0]; } else { &error; } } else { &error; } # run sgml2html if( system( "sgml2html $file" ) != 0 ){ exit; } # lookup table for what to change and not to change %lookuptbl = (quot, "quot",amp,"amp","lt","lt","gt","gt",copy,"\251",reg,"\256", micro,"\265",Agrave,"\300",Aacute,"\301",Acirc,"\302",Atilde,"\303", Auml,"\304",Aring,"\305",AElig,"\306",Ccedil,"\307",Egrave,"\310", Eacute,"\311",Ecirc,"\312",Euml,"\313",Igrave,"\314",Iacute,"\315", Icirc,"\316",Iduml,"\317",ETH,"\320",eth,"\320",Ntilde,"\321", Ograve,"\322",Oacute,"\323",Ocirc,"\324",Otilde,"\325",Ouml,"\326", Oslash,"\330",Ugrave,"\331",Uacute,"\332",THORN,"THORÑ",Thorn,"Thron", szlig,"\337",agrave,"\340",aacute,"\341",acirc,"\342",atilde,"\343", auml,"\344",aring,"\345",aelig,"\346",ccedil,"\347",egrave,"\350", eacute,"\351",ecirc,"\352",euml,"\353",igrave,"\354",iacute,"\355", icirc,"\356",iuml,"\357",eth,"\360",ntilde,"\361",ograve,"\362", oacute,"\363",ocirc,"\364",otilde,"\365",ouml,"\366",oslash,"\370", ugrave,"\371",uacute,"\372",ucirc,"\373",uuml,"uuml",yacute,"yacute", yuml,"yuml"); # separate directory and file name from the given file name if( $file =~ /(.*)[\/](.+)$/ ){ $dir = $1; $file = $2; } $file =~ s/(.+)\.sgml/$1/; # get rootname if( length( $dir ) == 0 ){ opendir( DIR, "."); } else { opendir( DIR, "$dir" ); } @html = grep {/$file.*\.html$/} readdir( DIR ); # get all related html files closedir( DIR ); foreach $html_file (@html) { print "Processing file $html_file\n"; open( INPUT, "$html_file"); open( OUTPUT,">$html_file.tmp" ); while( ){ $line = $_; $beg = index( $line, "&"); $end = index( $line, ";"); while( $beg >= 0 && $end > $beg ){ $target = substr( $line, $beg+1, $end-$beg-1); $thai = $lookuptbl{ $target }; if( $target eq $thai ){ $line = sprintf( "%s%s%s", substr( $line, 0, $beg), '&' . $thai . ';', substr( $line, $end+1 )); } else { $line = sprintf( "%s%s%s", substr( $line, 0, $beg), $thai, substr( $line, $end+1 )); } $beg = index( $line, "&", $beg+1); $end = index( $line, ";", $beg); } print OUTPUT "$line"; } close( INPUT ); close( OUTPUT ); if( $cut == 1 ){ system( "cttex 0 < $html_file.tmp > $html_file" ); system( "rm -f $html_file.tmp" ); } else { rename( "$html_file.tmp", "$html_file"); } } # EOF