proc() = plotmosaic(inX, inY)
; ---------------------------------------------------------------------
; Library     plot
; ---------------------------------------------------------------------
; See_also    
; ---------------------------------------------------------------------
; Macro       plotmosaic
; ---------------------------------------------------------------------
; Description Graphical Contingency Table
; ---------------------------------------------------------------------
; Usage       plotmosaic(inX {,inY})
; Input       
; Parameter   inX
; Definition  n x 1 Vector, discrete.
; Parameter   inY
; Definition  n x 1 Vector, discrete.
; Output      
; ---------------------------------------------------------------------
; Notes       This gives a visualization of a p x q contingency table,
;             where p and q are the number of levels of x and y, respectively.
;             The volumes of the boxes are proportional to the corresponding
;             frequencies, i.e. the width of the columns is proportional to 
;             the frequencies of x, and the height of the boxes within one
;             column are proportional to the frequencies of y conditioned on
;             this specific x. 
;
;             The labels are centered at the relative frequencies for the
;             unconditioned groups within x resp. y.
;
;             If y is omitted, a so called "spineplot" is generated for x.
;             This is a barchart where the height is constant (in this case 1)
;             and the width of the bars is proportional to the frequencies of 
;             the groups within x. 
;             
;             Requires the library "stats".
; ---------------------------------------------------------------------
; Example     library("plot")
;             library("stats")
;             x1 = #("one", "one", "one", "one", "one", "one", "one", "one")
;             x1 = x1 | #("two", "two", "two", "two", "two", "two", "three")
;             x1 = x1 | #("three", "three", "three")
;             y1 = #("one", "one", "two", "two", "three", "three", "four")
;             y1 = y1 | #("four", "one", "one", "one", "two", "two", "two")
;             y1 = y1 | #("one", "four", "four", "four")
;             plotmosaic(x1, y1)
;
;             x2 = #(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3)
;             y2 = #(1, 1, 2, 2, 3, 3, 4, 4, 1, 1, 1, 2, 2, 2, 1, 4, 4, 4)
;             plotmosaic(x2, y2)
; ---------------------------------------------------------------------
; Result      Both calls of plotmosaic() generate a mosaic plot of x and y.
;             Note how the different orderings (alphabetical) influence
;             the appearence of the plot.
; ---------------------------------------------------------------------
; Keywords    Mosaic Plot, Contingency Table, high level graphics
; ---------------------------------------------------------------------
; Author      Stephan R. W. Lauer, 990302
; ---------------------------------------------------------------------
  
   if (exist("inX") <> 1 && exist("inX") <> 2)
       error(1, "plotmosaic: both variables must be discrete vectors")
   endif

   if (exist("inY") < 1) // does not exist at all
      inY = matrix(rows(inX)) // fill with ones -> spineplot of inX
   else 
      if (exist("inY") <> 1 && exist("inY") <> 2)
          error(1, "plotmosaic: both variables must be discrete vectors")
      endif   
   endif
   
   if (rows(inX) <> rows(inY)) // must be of the same dataset
      error(1, "plotmosaic: both variables must have the same number of cases")
   endif

   // first discretize inX and inY to get all possible combinations
   {theXValue, theXFrequency} = discrete(inX)
   {theYValue, theYFrequency} = discrete(inY)

   theNumberOfXGroups = rows(theXValue)
   theNumberOfYGroups = rows(theYValue)

   theSumX = sum(theXFrequency)

   // some inits
   thePoints = ( 0 ~ 0 )
   theLines = ( 1 ~ 1 )
   theLineStyles = 0 // first one invisible
   theSkip = 0.05     // standard skip between rows/columns

   // init x locations
   theXLocLeft = 0.0  // lower left corner
   theXLocRight = 0.0 // lower right corner of the current box
  
   count = 1
   while (count <= theNumberOfXGroups)
         
         // set x location of the current column
         if (theXLocLeft < theSkip / 2) // if still the first column
               theXLocLeft = 0.0
               theXLocRight = theXFrequency[count]/theSumX
         else
               theXLocLeft = theXLocRight + theSkip
               theXLocRight = theXLocRight + theSkip + theXFrequency[count]/theSumX
         endif       


         // extract Y conditioned on current x
         theYCond = paf(inY, inX==theXValue[count])

         // now fill theYFrequency with the conditional frequencies
         innercounter = 1
         while (innercounter <= theNumberOfYGroups)
                theYFrequency[innercounter] = sum(theYCond == theYValue[innercounter])
         innercounter = innercounter + 1
         endo

         theSumY = sum(theYFrequency)
         
         if (theSumY < 0.5) // this group empty, all frequencies = 0
             theSumY = 1  
         endif
         
         // init theYLocBot and theYLocUp
         theYLocBot = 0.0
         theYLocUp = 0.0
         // now go through all groups in y conditioned on x
         innercounter = 1
         while (innercounter <= theNumberOfYGroups)
               // set y location of the current box
               if (theYLocBot < theSkip / 2) // still the first box
                    theYLocBot = 0.0
                    theYLocUp = theYFrequency[innercounter] / theSumY 
               else
                    theYLocBot = theYLocUp + theSkip
                    theYLocUp = theYLocUp + theSkip + theYFrequency[innercounter] / theSumY
               endif


               // calc box and lines
               theCurrentIndex = rows(thePoints)
               thePoints = thePoints | ( theXLocLeft ~ theYLocBot )
               thePoints = thePoints | ( theXLocRight ~ theYLocBot )
               thePoints = thePoints | ( theXLocRight ~ theYLocUp )
               thePoints = thePoints | ( theXLocLeft ~ theYLocUp )

               theLines = theLines | ( theCurrentIndex + 1 ~ theCurrentIndex + 2 )
               theLines = theLines | ( theCurrentIndex + 2 ~ theCurrentIndex + 3 )
               theLines = theLines | ( theCurrentIndex + 3 ~ theCurrentIndex + 4 )
               theLines = theLines | ( theCurrentIndex + 4 ~ theCurrentIndex + 1 )
  
               theLineStyles = theLineStyles | #(1, 1, 1, 1) // all solid


               theYLocBot = 2*theSkip // do this to avoid the if at the beginning of this while
         innercounter = innercounter + 1
         endo

         theXLocLeft = 2 * theSkip // do this to avoid the if at the beginning of this while

   count = count + 1            
   endo // groups in X left

   theLabels = ( -0.2 ~ 0) | (0 ~ -0.2) // do this to get more room around the plot
   theStrings = "Hallo" | "Hallo"
   theStringPositions = -1 | -1 // don't show these dummies
   theLeftLabelPos = -0.1
   theLowerLabelPos = -0.1 

   // now calculate all labels
   count = 1
   XisNumeric = (exist("inX")==1)
   theXLoc = ( theXFrequency[1] / theSumX ) / 2
   while (count <= theNumberOfXGroups)
         theLabels = theLabels | (theXLoc ~ theLowerLabelPos)
         if (XisNumeric)
            theStrings = theStrings | string("%1.0f", theXValue[count])
         else
            theStrings = theStrings | theXValue[count]
         endif
         theStringPositions = theStringPositions | 6   // below
         if (count < theNumberOfXGroups)
             theXLoc = theXLoc + (theXFrequency[count] / theSumX)  / 2 + theSkip + (theXFrequency[count+1] / theSumX)/2 
         endif
   count = count + 1
   endo

   YisNumeric = (exist("inY")==1)
   {theYValue, theYFrequency} = discrete(inY)
   theNumberOfYGroups = rows(theYValue)
   theSumY = sum(theYFrequency)
   theYLoc = (theYFrequency[1] / theSumY) / 2
   count = 1
   while (count <= theNumberOfYGroups)
         theLabels = theLabels | (theLeftLabelPos ~ theYLoc)
         if (YisNumeric)
            theStrings = theStrings | string("%1.0f", theYValue[count])
         else
            theStrings = theStrings | theYValue[count]
         endif
         theStringPositions = theStringPositions | 9        // to the left
         if (count < theNumberOfYGroups)
            theYLoc = theYLoc + (theYFrequency[count] / theSumY) / 2 + theSkip + (theYFrequency[count+1] / theSumY)/2 
         endif
   count = count + 1
   endo


   // set points and lines
   setmaskp(thePoints, 0, 0, 8)
   setmaskl(thePoints, theLines, 0, theLineStyles, 1)

   // set labels
   setmaskp(theLabels, 0, 0, 8)
   setmaskt(theLabels, theStrings, 0, theStringPositions, 16)

   // create display and show results
   contingency = createdisplay(1, 1)
   show(contingency, 1, 1, thePoints, theLabels)   
   
endp