(* ------------------------------------------------------------------------ *)
(* xml_share.sml                           Pascal Schmidt <pharao90@tzi.de> *)
(* ------------------------------------------------------------------------ *)
(* sharing for XML using internal entities                                  *)
(* ------------------------------------------------------------------------ *)

(* $Id: xml_share.sml,v 1.1 2004/02/13 11:29:21 7till Exp $ *)

signature xml_share_sig
=
sig
  val share_all : xml_types.xmldata -> xml_types.xmldata list *
                                       xml_types.xmldata

  val str_ord : string * string -> Library.order (* used in xml_dtd.sml *)
end

structure xml_share : xml_share_sig
=
struct

  exception CannotShare
  exception NotIndexed

  open xml_types

  (* ********************************************************************** *)
  (* Order and tables                                                       *)
  (* ********************************************************************** *)

  (* ---------------------------------------------------------------------- *)
  (* Order functions for xmldata datatype                                   *)
  (* ---------------------------------------------------------------------- *)

  fun int_ord (x:int,y:int) = if (x=y) then Library.EQUAL
                              else if (x>y) then Library.GREATER
                              else Library.LESS
  
  fun str_ord (x:string,y:string) = if (x=y) then Library.EQUAL else if (x>y)
                                    then Library.GREATER else Library.LESS

  fun tag_decide ([],x,[],y)     = attr_ord (x,y)
    | tag_decide ([],x,h::t,y)   = Library.LESS
    | tag_decide (h::t,x,[],y)   = Library.GREATER
    | tag_decide (h::t,x,i::u,y) = let
                                     val z = xmldata_ord (h,i)
                                   in
                                     if (z=Library.EQUAL) then
                                       tag_decide (t,x,u,y)
                                     else
                                       z
                                   end

  and pair_ord ((a,b),(c,d)) = if (a<c) then Library.LESS else
                               if (a>c) then Library.GREATER else
                               if (b<d) then Library.LESS else
                               if (b>d) then Library.GREATER else
                               Library.EQUAL

  and attr_ord ([],[])     = Library.EQUAL
    | attr_ord ([],h::t)   = Library.LESS
    | attr_ord (h::t,[])   = Library.GREATER
    | attr_ord (h::t,i::u) = let
                               val x = pair_ord (h,i)
                             in
                               if (x=Library.EQUAL) then
                                 attr_ord (t,u)
                               else
                                 x
                             end
  
  and tag_ord (x,a,s,y,b,t) = if (x<y) then Library.LESS else
                              if (x>y) then Library.GREATER else
                              tag_decide (a,s,b,t)

  and def_ord (x,a,y,b) = let
                            val t = str_ord (x,y)
                          in
                            if (t=Library.EQUAL) then
                              xmldata_ord (a,b)
                            else
                              t
                          end

  and empty_ord (x,a,y,b) = let
                              val t = str_ord (x,y)
                            in
                              if (t=Library.EQUAL) then
                                attr_ord (a,b)
                              else
                                t
                            end

  and xmldata_ord (pcdata x,pcdata y) = str_ord (x,y)
    | xmldata_ord (pcdata _,_) = Library.LESS
    | xmldata_ord (_,pcdata _) = Library.GREATER
    | xmldata_ord (entity_def (x,a),entity_def (y,b)) = def_ord (x,a,y,b)
    | xmldata_ord (entity_def _,_) = Library.LESS
    | xmldata_ord (_,entity_def _) = Library.GREATER
    | xmldata_ord (entity_ref x,entity_ref y) = str_ord (x,y)
    | xmldata_ord (entity_ref _,_) = Library.LESS
    | xmldata_ord (_,entity_ref _) = Library.GREATER
    | xmldata_ord (share_ref x,share_ref y) = int_ord (x,y)
    | xmldata_ord (share_ref _,_) = Library.LESS
    | xmldata_ord (_,share_ref _) = Library.GREATER
    | xmldata_ord (empty_tag (x,a),empty_tag (y,b)) = empty_ord (x,a,y,b)
    | xmldata_ord (empty_tag _,_) = Library.LESS
    | xmldata_ord (_,empty_tag _) = Library.GREATER
    | xmldata_ord (tag (x,s,a),tag (y,t,b)) = tag_ord (x,a,s,y,b,t)

  (* ---------------------------------------------------------------------- *)
  (* New symtab structures used by the sharing code                         *)
  (* ---------------------------------------------------------------------- *)

  structure symtab_xmldata = TableFun(type key = xmldata
                                      val ord = xmldata_ord);

  structure symtab_int     = TableFun(type key = int
                                      val ord = int_ord);

  (* ---------------------------------------------------------------------- *)
  (* Reference count handling                                               *)
  (* ---------------------------------------------------------------------- *)

  (* ---------------------------------------------------------------------- *)
  (* ref is used to contain sharing tables                                  *)
  (* (a,b,c,d,e): a holds the last sharing index used by add_ref, either a  *)
  (*                new index for new terms or an old one                   *)
  (*              b is the maximum sharing index in use at the moment       *)
  (*              c is a table with xmldata as key and sharing index data   *)
  (*              d is a table with index as key and xmldata as data, the   *)
  (*                reverse from c                                          *)
  (*              e is a table with index as key and refcount as data       *)
  (* ---------------------------------------------------------------------- *)

  type ref = int * int * int symtab_xmldata.table * xmldata symtab_int.table
             * int symtab_int.table

  (* ---------------------------------------------------------------------- *)
  (* Utility functions for Option datatype                                  *)
  (* ---------------------------------------------------------------------- *)

  fun is_some (Library.Some x) = true
    | is_some _        = false
    
  fun get_some (Library.Some x) = x
    | get_some _ = raise NotIndexed

  (* ********************************************************************** *)
  (* Functions on the ref type                                              *)
  (* ********************************************************************** *)

  (* ---------------------------------------------------------------------- *)
  (* lref returns the last sharing index used, which is needed for building *)
  (*   share_ref xmldata constructors                                       *)
  (* ---------------------------------------------------------------------- *)

  fun lref (r,_,_,_,_) = r

  (* ---------------------------------------------------------------------- *)
  (* add_ref adds or updates a term in the sharing tables, new terms are    *)
  (*   added, old terms get their reference count updated                   *)
  (*   the sharing index associated with the term is returned alongside the *)
  (*   updated tables                                                       *)
  (* ---------------------------------------------------------------------- *)

  fun add_ref (_,max,x,r,c) y =
                         let
                           val i = symtab_xmldata.lookup (x,y)
                         in
                           if (is_some i) then  (* already in table *)
                             let                (* update refcount only *)
                               val j = get_some i
                               val k = symtab_int.lookup (c,j)
                               val l = get_some k
                             in
                               (j,max,x,r,symtab_int.update ((j,l+1),c))
                             end
                           else   (* new xml term *)             
                             let  (* create new entries in all tables *)
                               val j = symtab_xmldata.update_new ((y,max+1),x)
                               val k = symtab_int.update_new ((max+1,1),c)
                               val l = symtab_int.update_new ((max+1,y),r)
                             in
                               (max+1,max+1,j,l,k)
                             end
                         end 

  (* ---------------------------------------------------------------------- *)
  (* get_ref returns the xml term and refcount for a specific index         *)
  (* ---------------------------------------------------------------------- *)

  fun get_ref (_,_,x,r,c) y = let
                                val i = symtab_int.lookup (c,y)
                                val j = get_some i
                                val k = symtab_int.lookup (r,y)
                                val l = get_some k
                              in
                                (j,l)
                              end

  (* ---------------------------------------------------------------------- *)
  (* dec_ref decreases the refcount of a specific index if it is above 1    *)
  (*   this is needed to handle sharing in nested constructs                *)
  (* ---------------------------------------------------------------------- *)

  fun dec_ref (n,m,x,r,c) (share_ref j) =
                              let
                                val k = symtab_int.lookup (c,j)
                                val l = get_some k
                              in
                                if (l>1) then
                                  (n,m,x,r,symtab_int.update ((j,l-1),c))
                                else
                                  (n,m,x,r,c)
                              end
    | dec_ref _ _ = raise CannotShare

  (* ---------------------------------------------------------------------- *)
  (* dec_refs is just a written out (map dec_ref)                           *)
  (* ---------------------------------------------------------------------- *)

  fun dec_refs rl [] = rl
    | dec_refs rl (h::t) = dec_refs (dec_ref rl h) t

  (* ********************************************************************** *)
  (* Sharing functions                                                      *)
  (* ********************************************************************** *)

  (* ---------------------------------------------------------------------- *)
  (* Share converts a complex xmldata tree to just one share_ref            *)
  (* ---------------------------------------------------------------------- *) 
 
  fun share rl (pcdata (s)) = let
                                val r = add_ref rl (pcdata (s))
                              in
                                (r,share_ref (lref r))
                              end
    | share rl (tag (s,l,m)) = let
                                 val (l,x,xml) = share_list rl (s,l,m) []
                                 val (nr,_) = get_ref l (lref l)
                               in
                                 if (nr>1) then
                                   (dec_refs l x,xml)
                                 else
                                   (l,xml)
                               end
    | share rl (empty_tag (s,l)) = let
                                     val r = add_ref rl (empty_tag (s,l))
                                   in
                                     (r,share_ref (lref r))
                                   end
    | share rl (entity_def (s,t)) = raise CannotShare
    | share rl (entity_ref (s)) = raise CannotShare
    | share rl (share_ref (i)) = raise CannotShare

  (* ---------------------------------------------------------------------- *)
  (* share_list handles sharing for the argument terms of a tag             *)
  (* ---------------------------------------------------------------------- *)

  and share_list rl (s,l,[]) x = let
                                   val r = add_ref rl (tag (s,l,x))
                                 in
                                   (r,x,share_ref (lref r))
                                 end
    | share_list rl (s,l,(h::t)) x = let
                                       val (r,y) = share rl h
                                     in
                                       share_list r (s,l,t) (x @ [y]) 
                                     end

  (* ---------------------------------------------------------------------- *)
  (* init_share is the first step of sharing, where the whole xml term is   *)
  (*   reduced to just one share_ref xmldata item                           *)
  (* ---------------------------------------------------------------------- *)

  fun init_share xml = share (0,0,symtab_xmldata.empty,symtab_int.empty,
                              symtab_int.empty) xml

  (* ---------------------------------------------------------------------- *)
  (* conv_sref translates xmldata with refcount>1 to entity references and  *)
  (*   those with refcount=1 to their normal representation                 *)
  (*   calls itself recursively if the reconstructed term is a tag, which   *)
  (*   has its own subterms which need to be handled                        *)
  (* ---------------------------------------------------------------------- *)

  fun conv_sref rl (share_ref i) = let
                                     val (rc,xml) = get_ref rl i
                                   in
                                     if rc>1 then
                                       entity_ref ("r"^(Int.toString i))
                                     else
                                       conv_sref rl xml
                                   end
    | conv_sref rl (tag (s,l,m)) = tag (s,l,(map (conv_sref rl)) m)
    | conv_sref rl (x)           = x

  (* ---------------------------------------------------------------------- *)
  (* conv_rlsref applies conv_sref recursively to the xmldata entries in    *)
  (*   the sharing table                                                    *)
  (*   it updates only the reverse (int -> xmldata) table, as that is used  *)
  (*   later to generate the xmldata output                                 *)
  (* ---------------------------------------------------------------------- *)

  fun conv_rlsref (l,0,x,r,c) (_,_,_,d,_) = (l,0,x,d,c)
    | conv_rlsref (l,m,x,r,c) (_,_,_,d,_)
      = let
          val i = symtab_int.lookup (r,m)
          val j = get_some i
          val k = conv_sref (l,m,x,r,c) j
        in
          conv_rlsref (l,m-1,x,r,c) (0,0,x,symtab_int.update ((m,k),d),c)
        end

  (* ---------------------------------------------------------------------- *)
  (* resolve_share is the second step of sharing, where all terms are made  *)
  (*   either entity references or original terms depending on refcount     *)
  (* ---------------------------------------------------------------------- *)

  fun resolve_share (rl,xml) = (conv_rlsref rl rl,conv_sref rl xml)

  (* ---------------------------------------------------------------------- *)
  (* ref_to_xml creates entity definitions for items with refcount>1        *)
  (* ---------------------------------------------------------------------- *)

  fun ref_to_xml (n,r,e) = if (r>1) then
                             SOME (entity_def ("r"^(Int.toString n),e))
                           else
                             NONE


  (* ---------------------------------------------------------------------- *)
  (* decomp and decompose create a single list from the three sharing       *)
  (*   tables                                                               *)
  (* ---------------------------------------------------------------------- *)

  fun decomp c (r,n) = let
                         val d = symtab_int.lookup (c,r)
                         val e = get_some d
                       in
                         (r,n,e)
                       end
  
  fun decompose (_,_,x,r,c) = let
                                val l = symtab_int.dest c
                              in
                                (map (decomp r)) l
                              end

  (* ---------------------------------------------------------------------- *)
  (* final_share is the last step of sharing, where entity definitions are  *)
  (*   created for all entity references                                    *)
  (* ---------------------------------------------------------------------- *)

  fun final_share (rl,xml) = ((List.map Option.valOf
                             (List.filter Option.isSome ((map ref_to_xml)
                             (decompose rl)))),xml)

  (* ********************************************************************** *)
  (* User interface                                                         *)
  (* ********************************************************************** *)

  (* ---------------------------------------------------------------------- *)
  (* share_all takes an xmldata item and converts it to a list of xmldata   *)
  (*   items which need to be added to the internal DTD subset and an       *)
  (*   xmldata item for the now-shared original xmldata                     *)
  (* ---------------------------------------------------------------------- *)

  fun share_all xml = (final_share o resolve_share o init_share) xml

end
(* ------------------------------------------------------------------------ *)
