(* *********************************************************************** *)
(*									   *)
(* Project: CATS 							   *)
(* Author: Kolyang & Till Mossakowski, University of Bremen		   *)
(* Date: 29.12.98				 			   *)
(* Purpose of this file: Lexical analysis for mixfix parsing		   *)
(*			 						   *)	
(*									   *)
(* *********************************************************************** *)

(* This is an adaption of the Isabelle lexer for CASL
*)

structure CASLScanner : 
sig
include LEXICON
  val scan : string -> Lexicon.token list list
  val scan_aterm : string -> Lexicon.token list 
  val CASL_lexicon : Lexicon.lexicon
end
 =
struct

open Library Term Lexicon 
infix 5 -- ^^;
infix 3 >>;
infix 0 ||;

(** functions from LEXICON that are not available for CASL lexical syntax **)

fun scan_id x = raise ERROR
fun scan_longid x = raise ERROR
fun scan_tid  x = raise ERROR
  
fun string_of_vname x = raise ERROR
fun string_of_vname' x = raise ERROR
fun scan_varname x = raise ERROR
fun scan_var x = raise ERROR
fun const x = raise ERROR
fun free x = raise ERROR
fun var x = raise ERROR


(** is_identifier etc. **)

val is_blank : string -> bool =
  fn " " => true | "\t" => true | "\n" => true | "\^L" => true | "\160" => true | "\r" => true
    | _ => false;

val SIGNs =
  ["+","-","*","/","\\","&","=","<",">","|","^","~","!","?",":",".","$","@",
   "#",
   (* "","","","","","","","","","","","",*)
   "\161","\191","\215","\247","\163","\169",
   "\177","\182","\167","\185","\178","\179",
  (* "","","","",""*)
   "\183","\162","\176","\172","\181",
   "%"  (* only needed for lambda-abstraction in HOL-CASL *)];
  
fun is_SIGN c = c mem SIGNs;

fun is_opening c = c mem ["[","{"]

fun is_closing c = c mem ["]","}"]

val accented_letters = 
    (*["","","","","","","","","","","","","","","","","",
     "","","","","","","","","","","","","","","","","",
     "","","","","","","","","","","","","","","","","",
     "","","","","","","","","",""]*)
    ["\192","\193","\194","\195","\196","\197","\198","\199","\200","\201",
     "\202","\203","\204","\205","\206","\207","\208","\209","\210","\211",
     "\212","\213","\214","\216","\217","\218","\219","\220","\221","\223",
     "\224","\225","\226","\227","\228","\229","\230","\231","\232","\233",
     "\234","\235","\236","\237","\238","\239","\240","\241","\242","\243",
     "\244","\245","\246","\248","\249","\250","\251","\252","\253","\254",
     "\255"]

fun is_extended_letter c =
	is_letter c orelse c mem accented_letters;

fun is_underscore c = 
	case explode c of
	("_"::cs) => true
	| _ => false;

fun is_prime c = 
	case explode c of
	("'"::cs) => true
	| _ => false;

fun is_extended_let c =
	is_extended_letter c orelse  is_prime c;
	
fun is_extended_letdig c =
	is_extended_letter c orelse is_digit c orelse is_prime c;
		
fun is_extended_letdig_underscore c =
	is_extended_letdig c orelse is_underscore c;
	
fun is_ident [] = false
  | is_ident (c :: cs) = is_extended_letter c andalso forall is_extended_letdig_underscore cs;

val is_identifier = is_ident o explode;

fun is_oct_digit x = x >="0" andalso x<= "7";
fun is_hex_digit x = (x >="0" andalso x<= "9") orelse (x >="A" andalso x<= "F");

fun is_printable c = (c >= " ");

fun is_no_percent c = not (c="%")

fun optional1 scan = scan || scan_empty >> K "";

fun scan_one_of _ [] = raise LEXICAL_ERROR
  | scan_one_of l (c :: cs) =
      if c mem l then (c, cs) else raise LEXICAL_ERROR;

(** scanners **)

val scan_letter_WORD = scan_one is_extended_let ^^ 
                (scan_any is_extended_letdig >> implode);

val scan_WORD = scan_any1 is_extended_letdig >> implode;

val scan_WORDS = scan_letter_WORD ^^ (repeat ($$ "_" ^^ scan_WORD) >> implode);

val scan_DOT_WORDS = $$ "." ^^ scan_WORDS;

val scan_DIGIT = scan_one is_digit
val scan_oct_DIGIT = scan_one is_oct_digit
val scan_hex_DIGIT = scan_one is_hex_digit

val scan_NUMBER = scan_any1 is_digit >> implode;
val scan_NNUMBER = scan_DIGIT ^^ scan_NUMBER;

val scan_FRACTION = scan_NUMBER ^^ $$ "." ^^ scan_NUMBER
val scan_EXP = scan_one_of ["E","e"] ^^ 
               optional1 (scan_one_of ["+","-"]) ^^
               scan_NUMBER
val scan_FLOAT = scan_FRACTION ^^ scan_EXP ||
                 scan_FRACTION ||
                 scan_NUMBER ^^ scan_EXP
(*val scan_PATH = scan_WORDS ^^ (repeat1 ($$ "/" ^^ scan_WORDS) >> implode);*)
val scan_PATH = scan_WORDS;

fun scan_URL_prefix ("h"::"t"::"t"::"p"::":"::"/"::"/"::cs) = ("http://",cs)
|   scan_URL_prefix ("f"::"i"::"l"::"e"::":"::"/"::"/"::cs) = ("file://",cs)
|   scan_URL_prefix ("f"::"t"::"p"::":"::"/"::"/"::cs) = ("ftp://",cs)
|   scan_URL_prefix _ = raise LEXICAL_ERROR;

val scan_URL =  scan_URL_prefix ^^ (scan_any (fn x => not (is_blank x)) >> implode);

val scan_SIGN =  scan_any1 is_SIGN >> implode; 

val scan_escapeCHAR =   ( $$ "\\" ^^ scan_one_of ["t","n","v","r","b","f","a","?","'","\"","\\"]) ||
                        ( $$ "\\" ^^ scan_DIGIT ^^ scan_DIGIT ^^ scan_DIGIT ) ||
                        ( $$ "\\" ^^ $$ "o" ^^ scan_oct_DIGIT ^^ scan_oct_DIGIT ^^ scan_oct_DIGIT ) ||
                        ( $$ "\\" ^^ $$ "x" ^^ scan_hex_DIGIT ^^ scan_hex_DIGIT  ) 

val scan_rawCHAR =   scan_escapeCHAR || (scan_one is_printable);
                     
val scan_rawCHAR1 =  scan_escapeCHAR  ||
                     (scan_one (fn c => is_printable c andalso not (c mem ["\"","\\"])));

val scan_CHAR = $$ "'" ^^ scan_rawCHAR ^^ $$ "'"

val scan_STRING = $$ "\"" ^^ (repeat scan_rawCHAR1 >> implode) ^^ $$ "\""

val scan_AFUN = (scan_one is_extended_letdig) ^^
                (scan_any (fn c => is_extended_letdig c orelse is_SIGN c) >> implode)

val scan_DISPLAY = $$ "%" ^^ $$ "!" ^^ (scan_any1 is_no_percent >> implode) ^^ $$ "%" ^^ $$ "!"

(** tokenize **)

fun tokenize  lex is_xid chs =
  let
    val scan_lit = scan_literal lex >> pair Token;

    val scan_val =
      scan_DISPLAY >> pair StringSy ||
      scan_URL >> pair UrlSy ||
(*      scan_PATH >> pair PathSy ||*)
      scan_CHAR >> pair CharSy ||
      scan_WORDS >> pair WordsSy ||
      scan_DOT_WORDS >> pair DotwordsSy ||
      scan_FLOAT >> pair FloatSy ||
      scan_NNUMBER >> pair NNumberSy ||
      scan_DIGIT >> pair DigitSy ||
      scan_STRING >> pair StringSy ||
      scan_SIGN >> pair SignSy;

    fun scan_comment1 ("%" :: "%" :: cs) = cs
      | scan_comment1 ("\n" :: cs) = cs
      | scan_comment1 (c :: cs) = scan_comment1 cs
      | scan_comment1 [] = [];

    fun scan_comment2 ("%" :: ")" :: cs) = cs
      | scan_comment2 (c :: cs) = scan_comment2 cs
      | scan_comment2 [] = [];

    fun scan (rev_toks, []) = rev (EndToken :: rev_toks)
      | scan (rev_toks, chs as "%" :: "%" :: cs) =
          scan (rev_toks, scan_comment1 cs)
      | scan (rev_toks, chs as "%" :: "(" :: cs) =
          scan (rev_toks, scan_comment2 cs)
      | scan (rev_toks, chs as c :: cs) =
          if is_blank c then scan (rev_toks, cs)
          else
            (case max_of scan_lit scan_val chs of
              (None, _) => error ("Lexical error at: " ^ quote (implode chs))
            | (Some (tk, s), chs') => scan (tk s :: rev_toks, chs'));
  in
    scan ([], chs)
  end;

fun tokenize_aterm lex is_xid chs =
  let
    val scan_lit = scan_literal lex >> pair Token;

    val scan_val =
      scan_AFUN >> pair WordsSy ||
      scan_NNUMBER >> pair NNumberSy ||
      scan_DIGIT >> pair DigitSy ||
      scan_STRING >> pair StringSy ||
      scan_SIGN >> pair SignSy;


    fun scan (rev_toks, []) = rev (EndToken :: rev_toks)
      | scan (rev_toks, chs as c :: cs) =
          if is_blank c then scan (rev_toks, cs)
          else
            (case max_of scan_lit scan_val chs of
              (None, _) => error ("Lexical error at: " ^ quote (implode chs))
            | (Some (tk, s), chs') => scan (tk s :: rev_toks, chs'));
  in
    scan ([], chs)
  end;


val CASL_lexicon =
make_lexicon
[":",":?","::=","=","=>","<=>",".","\183","|",
 "|->","\\/","/\\","\172","{","}","[","]",  (* those on p. C-10, first item *)
 "<","*","?","/","\215",
 "->","->?","=e=","{}", "<>",(* those on p. C-10 which are nevertheless
                           also TOKENS, which must be dealt with
                           at the parsing level *)
 "%!","%[","(",")",")?",";",",", (* those which are not SIGN anyway *)
 "__",  (* PLACE *)
 "as","assoc","axiom","axioms","comm","def",
  "else","end","exists","exists!","false","forall","free","generated",
  "hide","idem","if","in","not","op","ops","pred","preds","reveal",
  "sort","sorts",
  "spec","to","true","type","types","var","vars","version","when",
  "and","then","local","within","closed","with","given",
  "fit","view","arch","unit","units","result","lambda",
  "library","from","get",
  "%number", "%list", "%floating", "%string",
  "%prec", "%left", "%right",
  "%def", "%cons", "%implies"
  (* and the letter keywords *)]

val aterm_lexicon =
make_lexicon
["{","}","[","]","(",")",","]

fun trans_token (Token ("\215")) = Token ("*")
|   trans_token (Token ("\172")) = Token ("not")
|   trans_token (Token ("\183")) = Token (".")
|   trans_token t = t

val trans_tokens = map trans_token;

fun split tokens =
    let fun split1 tl ts nil = rev ts::tl
        |   split1 tl ts ((t as (Token s))::ts1) =
               if s mem ["spec","library","from","view"] 
               then split1 (rev (EndToken::ts)::tl) [t] ts1
               else 
               if s = "unit" 
               then (case ts1 of
                     ((t' as Token "spec")::ts2) 
                          => split1 (rev (EndToken::ts)::tl) [t',t] ts2
                     | _ => split1 tl (t::ts) ts1)
               else
               if s = "arch"
               then (case ts1 of
                     ((t' as Token "spec")::ts2) 
                          => split1 (rev (EndToken::ts)::tl) [t',t] ts2
                     | _ => split1 (rev (EndToken::ts)::tl) [t] ts1)
               else 
               if s = "["
               then (case ts1 of
                     ((t' as Token "view")::ts2) 
                          => split1 tl (t'::t::ts) ts2
                     | _ => split1 tl (t::ts) ts1)
               else split1 tl (t::ts) ts1
        |   split1 tl ts (t::ts1) =
               split1 tl (t::ts) ts1;
        val res = rev(split1 nil nil tokens)
    in
       if hd res = nil orelse hd res = [EndToken] then tl res else res
    end;


val scan  = split  o trans_tokens o
           (tokenize  CASL_lexicon false) o explode;

val scan_aterm = (tokenize_aterm aterm_lexicon false) o explode;


end;

