# The LearningOnline Network with CAPA
# Handler to cleanup XML files
#
# $Id: loncleanup.pm,v 1.10 2008/12/24 07:58:34 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
#
###

package Apache::loncleanup;

use strict;
use Apache::File;
use File::Copy;
use Apache::Constants qw(:common :http :methods);
use Apache::loncacc;
use Apache::loncommon();
use Apache::lonlocal;
use Apache::lonnet;
use lib '/home/httpd/lib/perl/';
use LONCAPA;
 

sub latextrans {
    my $symbolfont=shift;
    my %latexsymb=(
		   '&#177;' => '\pm',
		   '&#180;' => '\times',
		   '&#184;' => '\div',
		   '&#210;' => '(R)',
		   '&#211;' => '\copy',
		   '&#216;' => '\neg',
		   '&#226;' => '(R)',
		   '&#227;' => '\copy',
		   '&#166;' => 'f',
		   'A' => '\Alpha',
		   'B' => '\Beta',
		   'G' => '\Gamma',
		   'D' => '\Delta',
		   'E' => '\Epsilon',
		   'Z' => '\Zeta',
		   'H' => '\Eta',
		   'Q' => '\Theta',
		   'I' => '\Iota',
		   'K' => '\Kappa',
		   'L' => '\Lambda',
		   'M' => '\Mu',
		   'N' => '\Nu',
		   'X' => '\Xi',
		   'O' => '\Omicron',
		   'P' => '\Pi',
		   'R' => '\Rho',
		   'S' => '\Sigma',
		   'T' => '\Tau',
		   'U' => 'Y',
		   'F' => '\Phi',
		   'C' => '\Chi',
		   'Y' => '\Psi',
		   'W' => '\Omega',
		   'a' => '\alpha',
		   'b' => '\beta',
		   'g' => '\gamma',
		   'd' => '\delta',
		   'e' => '\epsilon',
		   'z' => '\zeta',
		   'h' => '\eta',
		   'q' => '\theta',
		   'i' => '\iota',
		   'k' => '\kappa',
		   'l' => '\lambda',
		   'm' => '\mu',
		   'n' => '\nu',
		   'x' => '\xi',
		   'o' => '\omicron',
		   'p' => '\pi',
		   'r' => '\rho',
		   'V' => '\sigmaf',
		   's' => '\sigma',
		   't' => '\tau',
		   'u' => '\upsilon',
		   'f' => '\phi',
		   'c' => '\chi',
		   'y' => '\psi',
		   'w' => '\omega',
		   'J' => '\vartheta',
		   'j' => '\varphi',
		   'v' => '\varpi',
		   '&#161;' => '\Upsilon',
		   '&#162;' => "'",
		   '&#164;' => '/',
		   '&#178;' => '"',
		   '&#188;' => '\ldots',
		   '&#192;' => '\aleph',
		   '&#193;' => '\Im',
		   '&#194;' => '\Re',
		   '&#195;' => '\wp',
		   '&#212;' => '^{TM}',
		   '&#228;' => '^{TM}',
		   '&#240;' => 'EUR',
		   '&#171;' => '\leftrightarrow',
		   '&#172;' => '\leftarrow',
		   '&#173;' => '\uparrow',
		   '&#174;' => '\rightarrow',
		   '&#175;' => '\downarraw',
		   '&#191;' => '\hookleftarrow',
		   '&#219;' => '\Leftrightarrow',
		   '&#220;' => '\Leftarrow',
		   '&#221;' => '\Uparrow',
		   '&#222;' => '\Rightarrow',
		   '&#223;' => '\Downarrow',
		   '&#34;' => '\forall',
		   '&#36;' => '\exists',
		   '&#39;' => '\ni',
		   '&#42;' => '\ast',
		   '&#45;' => '-',
		   '&#64;' => '\cong',
		   '&#92;' => '\therefore',
		   '&#94;' => '\perp',
		   '&#126;' => '\sim',
		   '&#163;' => '\leq',
		   '&#165;' => '\infty',
		   '&#179;' => '\geq',
		   '&#181;' => '\propto',
		   '&#182;' => '\partial',
		   '&#183;' => '\cdot',
		   '&#185;' => '\not=',
		   '&#186;' => '\equiv',
		   '&#187;' => '\approx',
		   '&#196;' => '\otimes',
		   '&#197;' => '\oplus',
		   '&#198;' => '\emptyset',
		   '&#199;' => '\cap',
		   '&#200;' => '\cup',
		   '&#201;' => '\supset',
		   '&#202;' => '\supseteq',
		   '&#203;' => '\not\subset',
		   '&#204;' => '\subset',
		   '&#205;' => '\subseteq',
		   '&#206;' => '\in',
		   '&#207;' => '\not\in',
		   '&#208;' => '\angle',
		   '&#209;' => '\nabla',
		   '&#213;' => '\prod',
		   '&#214;' => '\surd',
		   '&#215;' => '\cdot',
		   '&#217;' => '\wedge',
		   '&#218;' => '\wee',
		   '&#229;' => '\sum',
		   '&#242;' => '\int',
		   '&#225;' => '\langle',
		   '&#241;' => '\rangle',
		   '&#224;' => '\diamondsuit',
		   '&#167;' => '\clubsuit',
		   '&#168;' => '\diamondsuit',
		   '&#169;' => '\heartsuit',
		   '&#170;' => '\spadesuit'
		   );
    my $output='';
    my $char='';
    my $entitymode=0;
    for (my $i=0; $i<length($symbolfont); $i++) {
        my $newchar=substr($symbolfont,$i,1);
        $char.=$newchar;
        if ($newchar eq '&') { $entitymode=1; }
        if (($entitymode) && ($newchar ne ';')) { next; }
        my $latex=$latexsymb{$char};
	if ($latex) {
	    $output.=$latex;
	} else {
	    $output.=$char;
	}
        $char='';
        $entitymode=0;
    }
    return $output;
}

sub insidetrans {
    my @args=@_;
    return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
}

sub symbolfontreplace {
    my $text=shift;
    my @fragments=split(/\<\/font\>/si,$text);
    for (my $i=0; $i<=$#fragments;$i++) {
	$fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
    }
    return join('</font>',@fragments);
}

sub htmlclean {
    my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_;
# Take care of CRLF etc
    unless ($blocklinefeed) {
	$raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
	$raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
	$raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
	$raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
    }
# Generate empty tags, remove wrong end tags
    unless ($blockemptytags) {
	$raw=~s/\<(br|hr|img|meta|embed|allow|basefont)([^\>]*?)\>/\<$1$2 \/\>/gis;
	$raw=~s/\<\/(br|hr|img|meta|embed|allow|basefont)\>//gis;
	$raw=~s/\/ \/\>/\/\>/gs;
	unless ($full) {
	    $raw=~s/\<[\/]*(body|head|html)\>//gis;
	}
    }
# Make standard tags lowercase
    unless ($blocklowercasing) {
	foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
		 'table','tr','td','th','p','br','hr','img','embed','font',
		 'a','strong','center','title','basefont','li','ol','ul',
		 'input','select','form','option','script','pre') {
	    $raw=~s/\<$_\s*\>/\<$_\>/gis;
	    $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
	    $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
	}
    }
# Replace <font face="symbol">
    unless ($blockdesymboling) {
	$raw=&symbolfontreplace($raw);
    }
    return $raw;
}

sub phaseone {
    my ($r,$fn,$uname,$udom)=@_;
    $r->print(&mt('Select actions to attempt:').
	      '<br /><input type="checkbox" name="linefeed" checked="checked" /> '.
	      &mt('Linefeeds, formfeeds, and carriage returns').
	      '<br /><input type="checkbox" name="empty" checked="checked" /> '.
	      &mt('Empty tags').
	      '<br /><input type="checkbox" name="lower" checked="checked" /> '.
	      &mt('Lower casing').
	      '<br /><input type="checkbox" name="symbol"checked="checked" /> '.
	      &mt('Symbol font').
	      '<input type="hidden" name="phase" value="two" />'.
	      '<p><input type="submit" value="'.&mt('Cleanup').'" /></p>');
}

sub phasetwo {
    my ($r,$fn,$uname,$udom)=@_;
    open(IN,'/home/'.$uname.'/public_html/'.$fn);
    my $text='';
    while (my $line=<IN>) {
	$text.=$line;
    }
    close(IN);
    my $uri='/~'.$uname.$fn;
    my $result=&Apache::lonnet::ssi_body($uri,
					 ('grade_target'=>'web',
					  'return_only_error_and_warning_counts' => 1));
    my ($errorcount,$warningcount)=split(':',$result);
    $r->print(&mt('Original file').': '.
	      $errorcount.' '.&mt('error(s)').', '.
	      $warningcount.' '.&mt('warning(s)'));
    $text=&htmlclean($text,1,
               ($env{'form.linefeed'} ne 'on'),
               ($env{'form.empty'} ne 'on'),
               ($env{'form.lower'} ne 'on'),
               ($env{'form.symbol'} ne 'on'));
    my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
    my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
    open(OUT,'>/home/'.$uname.'/public_html'.$newfn);
    print OUT $text;
    close(OUT);
    my $newuri='/~'.$uname.$newfn;
    $result=&Apache::lonnet::ssi_body($newuri,
					 ('grade_target'=>'web',
					  'return_only_error_and_warning_counts' => 1));
    ($errorcount,$warningcount)=split(':',$result);
    $r->print('<br />'.&mt('Cleaned up file').': '.
	      $errorcount.' '.&mt('error(s)').', '.
	      $warningcount.' '.&mt('warning(s)').
              '<br /><a href="'.$newuri.'" target="prev">'.
	      &mt('Open (and edit) cleaned up file in new window').'</a>'.
              '<br /><a href="/adm/diff?filename='.&escape($uri).
	      '&versionone=priv&filetwo='.
	      &escape($newuri).'" target="prev">'.
	      &mt('Show diffs in new window').'</a><br />'.
	      '<input type="hidden" name="phase" value="three" />'.
	      '<input type="submit" name="accept" value="'.&mt('Accept Result').'" />'.
	      '<input type="submit" name="reject" value="'.&mt('Reject Result').'" />'
	      );
}

sub phasethree {
    my ($r,$fn,$uname,$udom)=@_;
    my $old='/home/'.$uname.'/public_html/'.$fn;
    my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
    my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
    my $new='/home/'.$uname.'/public_html'.$newfn;
    if ($env{'form.accept'}) {
	$r->print(&mt('Accepting changes'));
        move($new,$old);
    } else {
	$r->print(&mt('Rejeting changes'));
        unlink($new);
    }
}

# ---------------------------------------------------------------- Main Handler
sub handler {

    my $r=shift;
    my $fn='';

# Get query string for limited number of parameters

    &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
					    ['filename']);

    if ($env{'form.filename'}) {
	$fn=$env{'form.filename'};
	$fn=~s/^https?\:\/\/[^\/]+//;
    } else {
	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
		       ' unspecified filename for cleanup', $r->filename); 
	return HTTP_NOT_FOUND;
    }

    unless ($fn) { 
	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
		       ' trying to cleanup non-existing file', $r->filename); 
	return HTTP_NOT_FOUND;
    } 

# ----------------------------------------------------------- Start page output
    my $uname;
    my $udom;

    ($uname,$udom)=
	&Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
    unless (($uname) && ($udom)) {
	$r->log_reason($uname.' at '.$udom.
		       ' trying to cleanup file '.$env{'form.filename'}.
		       ' ('.$fn.') - not authorized', 
		       $r->filename); 
	return HTTP_NOT_ACCEPTABLE;
    }

    $fn=~s{/~($LONCAPA::username_re)}{};

    &Apache::loncommon::content_type($r,'text/html');
    $r->send_http_header;

    $r->print(&Apache::loncommon::start_page('Cleanup XML Document'));
    $r->print('<h2>'.$fn.'</h2>'.
              '<form action="/adm/cleanup" method="post">'.
              '<input type="hidden" name="filename" value="'.$env{'form.filename'}.'" />');
    unless ($fn=~/\.(problem|exam|quiz|assess|survey|form|library|xml|html|htm|xhtml|xhtm|sty)$/) {
	$r->print(&mt('Cannot cleanup this filetype'));
    } else {
	if ($env{'form.phase'} eq 'three') {
	    &phasethree($r,$fn,$uname,$udom);
	} elsif ($env{'form.phase'} eq 'two') {
	    &phasetwo($r,$fn,$uname,$udom);
	} else {
	    &phaseone($r,$fn,$uname,$udom);
	}
    }
    my $dir=$fn;
    $dir=~s/\/[^\/]+$/\//;
    $r->print('</form>'.
	      '<br /><a href="/priv/'.$uname.'/'.$fn.'">'.&mt('Back to Source File').'</a>'.
              '<br /><a href="/priv/'.$uname.'/'.$dir.'">'.&mt('Back to Source Directory').'</a>'.
	      &Apache::loncommon::end_page());
    return OK;  
}

1;
__END__
