(***************************************************************** *)
(*								   *)
(* Project: CATS						   *)
(* Author: Till Mossakowski, University of Bremen		   *)
(* Date: 29.12.98						   *)
(* Purpose of this file: Mixfix Parser for CASL 		   *)
(*								   *)
(*								   *)
(***************************************************************** *)

(* We use the Isabelle parser, which can handle dynamically constructed
   grammars, for mixfix parsing. The CASL grammar is dynamically
   extended according to the declared mixfix symbols.

   todo
   Future work: implement directly the CYK algorithm for context free
   recognition. This has the advantage that precedence checking
   can be done interleaved with the mixfix parsing, avaoiding
   space and time problems due to explosions of combinations
   when ambiguities arise.

  error messages are collected with global variable  

   Prezedenzen bei Mixfix-Operatoren
     Ungleichungen behandeln
     Gleiche Praezedenzen richtig behandeln
  renaming von mixfix (siehe FinitePowerSet3) 
  Bessere Fehlermeldungen bei mehrdeutigen Eingaben
  Gehen anonyme Operatoren?
  bracket balancing
*)

structure IsabelleMixfixParser :
sig
  val mixfix_parse : bool -> (LocalEnv.sign * AS.ANNO list)
                          -> AS.BASIC_SPEC -> (AS.BASIC_SPEC * string list)
  val parse_axiom  : bool -> (LocalEnv.sign * AS.ANNO list)  -> string -> (AS.FORMULA * string list)
  val parse_term   : (LocalEnv.sign * AS.ANNO list)  -> string -> (AS.TERM * string list)
  val get_prec_ord : AS.ID list -> AS.ANNO list 
                       -> IsabelleParser.precedence_order
end
=
struct

open AS Term Utils IDOrder 
     BasicPrint LocalEnv CASLScanner IsabelleParser
infix 9  $;
infixr 5 -->;
infixr --->;
infix aconv;
infix mem;
infix upto;


val errlist = ref [] : string list ref;

(*** Construction of syntax for Mixfix parsing ***)

fun quote_special "_" = "'_"
  | quote_special "/" = "'/"
  | quote_special "'" = "''"
  | quote_special x = x
val quote_specials = concat o (map quote_special) o explode

fun separate_brackets1 (c:string) = 
  if c mem ["[","]","{","}"] 
  then [" ",c," "]
  else [c]
  
fun separate_brackets s = 
  implode (flat (map separate_brackets1 (explode s)))

fun split_ID (simple_id sid) = (sid,[])
|   split_ID (compound_id (sid,id1list)) = (sid,id1list)


fun make_name nil components = ""
|   make_name (token t::rest) components = 
       print_ID(compound_id (([],t,None),components))
       ^make_name rest components
|   make_name (place::rest) components = 
       "__"^make_name rest components 

fun make_spaced_name nil components = ""
|   make_spaced_name (token t::rest) components = 
       print_ID(compound_id (([],quote_specials (separate_brackets t),None),components))
       ^make_spaced_name rest components
|   make_spaced_name (place::rest) components = 
       " '_'_ "^make_spaced_name rest components 

fun make_mixfix_anno nil components = ""
|   make_mixfix_anno (token t::rest) components = 
       print_ID(compound_id (([],quote_specials (separate_brackets t),None),components))
       ^make_mixfix_anno rest components
|   make_mixfix_anno (place::rest) components = 
       "_"^make_mixfix_anno rest components 

fun get_arity nil = 0
|   get_arity (token t::rest) = get_arity rest
|   get_arity (place::rest) = 1+get_arity rest

fun get_tokens nil = nil
|   get_tokens (token t::rest) = t::get_tokens rest
|   get_tokens (place::rest) = get_tokens rest

fun is_special x = x mem (place :: map token ["{}","<","*","?","/","->","->?","<>"])
    (* special SIGNs from CASL98Pure.thy *)

fun add_mixfix_fun ((syn,toks),(fun_name,_)) =
    let val ((tokenplacelist,str,_),components) = split_ID fun_name
        val stype = get_symbol_type tokenplacelist
    in if stype = ordinary_sym
    then (syn,toks)
    else let
        val newtoks = get_tokens tokenplacelist
        val mixfix_name = make_name tokenplacelist components
        val mixfix_spaced_name = make_spaced_name tokenplacelist components
        val mixfix_anno = make_mixfix_anno tokenplacelist components
        val arity_list = 1 upto (get_arity tokenplacelist)
        val (argprio,resprio) = 
                  case stype of
                      ordinary_sym => (0,1000) (* this cannot be, due to the above "if ..." *)
                      | postfix_sym => (990,990)
                      | prefix_sym => (980,980)
                      | infix_sym => (100,100)
                      | outfix_sym => (0,1000) 
        val argprio_list = 
             map (K argprio) arity_list
        val type1 = map (K TTermType) arity_list ---> TTermType 
        val type2 = MixfixType
        val mix_id_syntax = if forall1 (map is_special tokenplacelist)
                            then []
                            else [("op"^mixfix_name,type2,Mixfix.Delimfix mixfix_spaced_name)]   
        val mix_list = (mixfix_name,type1,Mixfix.Mixfix (mixfix_anno,argprio_list,resprio)) :: mix_id_syntax
      in (Syntax.extend_const_gram syn ("",true) mix_list,
          newtoks@toks)
      end
    end;
(* Check arity ? No, this is done by overload resolution *)

fun add_mixfix_pred ((syn,toks),(pred_name,_)) =
    let val ((tokenplacelist,str,_),components) = split_ID pred_name
    in if place mem tokenplacelist
    then let
        val newtoks = get_tokens tokenplacelist
        val mixfix_name = make_name tokenplacelist components
        val mixfix_spaced_name = make_spaced_name tokenplacelist components
        val mixfix_anno = make_mixfix_anno tokenplacelist components
        val arity_list = 1 upto (get_arity tokenplacelist)
        val (argprio,resprio) = (30,30)
        val argprio_list = map (K argprio) arity_list
        val type1 = map (K TTermType) arity_list ---> AtomType  
        val type2 = MixfixType  
        val mix_id_syntax = if forall1 (map is_special tokenplacelist)
                            then []
                            else [("op"^mixfix_name,type2,Mixfix.Delimfix mixfix_spaced_name)]
        val mix_list = (mixfix_name,type1,Mixfix.Mixfix (mixfix_anno,argprio_list,resprio))
                        :: mix_id_syntax 
      in (Syntax.extend_const_gram syn ("",true) mix_list,
          newtoks@toks)
      end
    else (syn,toks)
    end;

fun add_mixfix_list ((syn,toks),list_anno(brackets,_,_)) =
    let val ((tokenplacelist,str,_),components) = split_ID brackets
        val newtoks = get_tokens tokenplacelist
        val mixfix_name = make_name tokenplacelist components
        val mixfix_spaced_name = make_spaced_name tokenplacelist components
        val mixfix_anno = make_mixfix_anno tokenplacelist components
        val (argprio,resprio) = (0,1000)
        val type1 = [TTermsType] ---> TTermType 
        val mix_list = [(mixfix_name,type1,Mixfix.Mixfix (mixfix_anno,[argprio],resprio))]
      in (Syntax.extend_const_gram syn ("",true) mix_list,
          newtoks@toks)
      end
  | add_mixfix_list ((syn,toks),an) = (syn,toks);


fun add_mixfix syn ((_,_,funenv,predenv),an) =
    let val funlist = Symtab_id.dest funenv;
        val predlist = Symtab_id.dest predenv
        val (syn1,toks1) = foldl add_mixfix_fun ((syn,[]),funlist)
        val (syn2,toks2) = foldl add_mixfix_pred ((syn1,toks1),predlist)
        val (syn3,toks3) = foldl add_mixfix_list ((syn2,toks2),an)
    in (syn3,toks3)
    end;    


(* Mixfix analysis *)


fun convert_mixfix_token mix_tokens (t as (Lexicon.SignSy s)) =
	if s mem mix_tokens
	then Lexicon.Token s
	else t
|  convert_mixfix_token mix_tokens (t as (Lexicon.WordsSy s)) =
	if s mem mix_tokens
	then Lexicon.Token s
	else t
|  convert_mixfix_token mix_tokens (t as (Lexicon.DigitSy s)) =
	if s mem mix_tokens
	then Lexicon.Token s
	else t
|  convert_mixfix_token mix_tokens (t as (Lexicon.CharSy s)) =
	if s mem mix_tokens
	then Lexicon.Token s
	else t
|  convert_mixfix_token mix_tokens t = t;
	
fun conv_tok (Token "__") = ""
|   conv_tok (Token s) = s
|   conv_tok (SignSy s) = s
|   conv_tok (WordsSy s) = s
|   conv_tok (DigitSy s) = s
|   conv_tok (CharSy s) = s
|   conv_tok _ = raise (ERR "Mixfix: conv_tok")

fun separate_token id = map conv_tok (rev ( tl ( rev (hd (scan id)))))
 
fun separate_tokens toks = flat (map separate_token toks)
 
fun convert_mixfix_tokens nil toks = toks
|   convert_mixfix_tokens mix_tokens toks = 
    let val sep_tokens = separate_tokens mix_tokens
    in map (convert_mixfix_token sep_tokens) toks
    end



fun mixfix_parse_TERM1 r (Sigma,vl,spec_syn,mix_tokens,an,prec_ord,html,_) t =
    let val tokens = hd (scan t)
        val tokens1 = convert_mixfix_tokens mix_tokens tokens
        val IsabelleTerm = read_term r (Sigma,vl,prec_ord,an,true) html spec_syn TTermType tokens1
        val t1 = Convert.convert_TTerm an IsabelleTerm
    in t1
    end
    
fun mixfix_parse_TERM r mix_syn (unparsed_term t) =
    (mixfix_parse_TERM1 r mix_syn t
       handle exn => if !Global.test then raise exn 
          else
          (errlist:= !errlist @ (case exn of
             ERR s => [perr r^"(mixfix) "^s]
             | AMBIGUOUS _ => []
             | _ => []);
           unparsed_term t))
  | mixfix_parse_TERM _ mix_syn (pos_TERM(r,b,t)) = 
    pos_TERM(r,b,mixfix_parse_TERM r mix_syn t)
  | mixfix_parse_TERM _ mix_syn t = t


fun mixfix_parse_FORMULA1 r (Sigma,vl,spec_syn,mix_tokens,an,prec_ord,html,is_goal) phi =
    let val tokens = hd (scan phi)
        val tokens1 = convert_mixfix_tokens mix_tokens tokens
        val IsabelleTerm = read_formula r (Sigma,vl,prec_ord,an,is_goal) html spec_syn FormulaType tokens1
        val phi1 = Convert.convert_Formula an IsabelleTerm
    in phi1
    end

fun mixfix_parse_FORMULA r mix_syn (unparsed_formula phi) =
    (mixfix_parse_FORMULA1 r mix_syn phi
       handle exn => if !Global.test then raise exn
           else 
           ((errlist := !errlist @ 
            (case exn of
              ERR s => [perr r^"(mixfix) "^s]
              | AMBIGUOUS _ => []
              | _ => []));
             unparsed_formula phi
            ))
  | mixfix_parse_FORMULA _ mix_syn (pos_FORMULA(r,b,phi)) = 
    pos_FORMULA(r,b,mixfix_parse_FORMULA r mix_syn phi)
  | mixfix_parse_FORMULA _ mix_syn phi = phi

fun mixfix_parse_attr r mix_syn (unit_op_attr t)
    = unit_op_attr (mixfix_parse_TERM r mix_syn t)
  | mixfix_parse_attr _ mix_syn (pos_OP_ATTR (r,a)) =
    pos_OP_ATTR(r,mixfix_parse_attr r mix_syn a)
  | mixfix_parse_attr _ _ x = x


fun get_vars (arg_decl vlist) = map fst (fst vlist)
  | get_vars (pos_ARG_DECL (_,ad)) = get_vars ad

fun get_vars_OP_HEAD (total_op_head (vlist,_)) = vlist
  | get_vars_OP_HEAD (partial_op_head (vlist,_)) = vlist
  | get_vars_OP_HEAD (pos_OP_HEAD (_,h)) = 
    get_vars_OP_HEAD h

fun mixfix_parse_op_item (Sigma,vdl,syn,toks,an,prec_ord,html,is_goal)
                         (op_defn (f,op_head,t,ans),ans1):OP_ITEM*ANNO list =
    let val vlist = get_vars_OP_HEAD op_head
    in
      (op_defn (f,op_head,
          mixfix_parse_TERM null_region (Sigma,flat(map get_vars vlist)@vdl,syn,toks,an,prec_ord,html,is_goal) t,ans),ans1)
    end
  | mixfix_parse_op_item mix_syn (op_decl (f,t,att_list),ans) = 
     (op_decl (f,t,map (mixfix_parse_attr null_region mix_syn) att_list),ans)
  | mixfix_parse_op_item mix_syn (pos_OP_ITEM (r,oi),ans) =
    let val (oi1,ans1) = mixfix_parse_op_item mix_syn (oi,ans)
    in (pos_OP_ITEM(r,oi1),ans1)
    end
 
fun mixfix_parse_sortitems (Sigma,vdl,syn,toks,an,prec_ord,html,is_goal) 
                           (subsort_defn (s1,v,s,phi,ans),l) =
   (subsort_defn (s1,v,s,
                  mixfix_parse_FORMULA null_region
                      (Sigma,fst v::vdl,syn,toks,an,prec_ord,html,is_goal) phi,
                  ans),l)
  | mixfix_parse_sortitems mix_syn (pos_SORT_ITEM (r,si),ans) =
    let val (si1,ans1) = mixfix_parse_sortitems mix_syn (si,ans)
    in (pos_SORT_ITEM (r,si1),ans1)
    end
  | mixfix_parse_sortitems mix_syn x = x

fun get_vars_PRED_HEAD (pred_head vlist) = vlist
  | get_vars_PRED_HEAD (pos_PRED_HEAD(_,ph)) = 
    get_vars_PRED_HEAD ph

fun mixfix_parse_pred_item (Sigma,vdl,syn,toks,an,prec_ord,html,is_goal)
                           (pred_defn (p,pred_hd,(phi,l),ans),ans1) =
	(pred_defn (p,pred_hd,
           (mixfix_parse_FORMULA null_region
                                 (Sigma,flat(map get_vars (get_vars_PRED_HEAD pred_hd))@vdl,
                                  syn,toks,an,prec_ord,html,is_goal) phi,l),ans),ans1)
   | mixfix_parse_pred_item mix_syn (pos_PRED_ITEM (r,pi),ans) =
     let val (pi1,ans1) = mixfix_parse_pred_item mix_syn (pi,ans)
     in (pos_PRED_ITEM(r,pi1),ans1)
     end
   | mixfix_parse_pred_item mix_syn x = x

and mixfix_parse_sig mix_syn  (sort_items (ls,ans)) = 
    sort_items (map (mixfix_parse_sortitems mix_syn) ls,ans)
  | mixfix_parse_sig mix_syn  (datatype_items (decl,ans)) = 
    datatype_items (decl,ans)
  | mixfix_parse_sig mix_syn  (op_items (decl,ans)) = 
    op_items (map (mixfix_parse_op_item mix_syn) decl,ans)
  | mixfix_parse_sig mix_syn  (pred_items (decl,ans)) = 
    pred_items (map (mixfix_parse_pred_item mix_syn) decl,ans)
  | mixfix_parse_sig mix_syn (pos_SIG_ITEMS (r,s)) =
    pos_SIG_ITEMS (r, mixfix_parse_sig mix_syn s)
  
  
and mixfix_parse_lfor mix_syn (f,l) = 
    (mixfix_parse_FORMULA null_region mix_syn f,l)

  
and mixfix_parse_bit mix_syn (axiom_items (fl,ans)) =  
    axiom_items (map (mixfix_parse_lfor mix_syn) fl,ans)
  | mixfix_parse_bit  (Sigma,vdl,syn,toks,an,prec_ord,html,is_goal)
                      (local_var_axioms (vl,fl,ans)) = 
    local_var_axioms (vl, map (mixfix_parse_lfor  
           (Sigma,map fst (flat(map fst vl))@vdl,syn,toks,an,prec_ord,html,is_goal) ) fl,ans)
  | mixfix_parse_bit mix_syn (sig_items decl) = 
    sig_items (mixfix_parse_sig mix_syn decl)
  | mixfix_parse_bit mix_syn (pos_BASIC_ITEMS (r,bit)) =
    pos_BASIC_ITEMS(r,mixfix_parse_bit mix_syn bit)
  | mixfix_parse_bit mix_syn bit = bit 


(*** Precedence orders ***)

fun cons_entry_list entry_list (tab,key) =
    Symtab_id.update ((key, entry_list @ Symtab_id.lookup_multi (tab, key)), tab);

fun get_prec_rel1 (rel,prec_anno (true,ids1,ids2)) = 
    foldl (cons_entry_list ids1) (rel,ids2)
  | get_prec_rel1 (rel,pos_ANNO(_,an)) =
    get_prec_rel1 (rel,an)
  | get_prec_rel1 (rel,_) = rel

fun get_prec_rel rel ans =
    foldl get_prec_rel1 (rel,ans)

fun get_prec_ord (flist : ID list) (ans : ANNO list) : precedence_order =
    let val prec_rel = Symtab_id.dest (get_prec_rel (Symtab_id.make (identity flist)) ans)
        handle DUP => raise ERR "Internal error: precedence order incorrect"
        val prec_ord = reflexive_closure (transitive_closure ID_eq prec_rel)
    in prec_ord
    end



(*** Mixfix parsing functions ***)

fun mixfix_parse html (anno_sig as (Sigma,an)) (basic_spec bit_list) =
    let 
       val _ = errlist := [];
       val an1 = map remove_pos_ANNO an
       val (syn,toks) = add_mixfix CASL98 anno_sig
       val (subsortenv,varenv,funenv,predenv) = Sigma
       val prec_ord = get_prec_ord (map fst (Symtab_id.dest funenv)) an1
       val mix_syn = (Sigma,[],syn,toks,an1,prec_ord,html,false)
     in (basic_spec (map (mixfix_parse_bit mix_syn) bit_list),!errlist)
     end
  | mixfix_parse html anno_sig (pos_BASIC_SPEC (r,b)) =
    let val (bspec,errs) = mixfix_parse html anno_sig b
    in (pos_BASIC_SPEC (r,bspec),errs)
    end

fun parse_axiom is_goal (anno_sig as (Sigma,an)) ax =
     let val _ = errlist := [];
         val an1 = map remove_pos_ANNO an
         val (syn,toks) = add_mixfix CASL98 anno_sig
         val (subsortenv,varenv,funenv,predenv) = Sigma
         val prec_ord = get_prec_ord (map fst (Symtab_id.dest funenv)) an1
         val phi = mixfix_parse_FORMULA null_region
                                        (Sigma,[],syn,toks,an1,prec_ord,false,is_goal) 
                                        (unparsed_formula ax)
     in (phi,!errlist)
     end

fun parse_term (anno_sig as (Sigma,an)) t =
     let val _ = errlist := [];
         val an1 = map remove_pos_ANNO an
         val (syn,toks) = add_mixfix CASL98 anno_sig
         val (subsortenv,varenv,funenv,predenv) = Sigma
         val prec_ord = get_prec_ord (map fst (Symtab_id.dest funenv)) an1
         val parse_t = mixfix_parse_TERM null_region
                                        (Sigma,[],syn,toks,an1,prec_ord,false,false) 
                                        (unparsed_term t)
     in (parse_t,!errlist)
     end
  
end




