proc(bp)=dispbox(inX, inFactor)
; -----------------------------------------------------------------------
; Library      graphic
; -----------------------------------------------------------------------
; See_also     dispdot plotbox
; -----------------------------------------------------------------------
; Macro        dispbox
; -----------------------------------------------------------------------
; Description  Generates a standard boxplot. An optional factor allows 
;              multiple boxplots by group.
; -----------------------------------------------------------------------
; Usage        bp = dispbox(x {, factor})
; Input
;   Parameter  x
;   Definition n x 1      vector (continuous variable)
;   Parameter  factor
;   Definition n x 1      vector (discrete variable)
; Output
;   Parameter  bp
;   Definition composed graphical object
; ---------------------------------------------------------------------
; Notes        Generates a boxplot with median line and upper and lower 
;              edge of the box being the upper and lower quartile.
;
;              The whiskers designate the farest non-outliers above resp.
;              below the box.
;
;              Outliers are all data at least 1.5 * (height of box) away
;              from the upper resp. lower edge of the box. Outliers are 
;              plotted as circles.
;
;              If this distance exceeds 3 * (height of box), a star symbol
;              is used ("gross outlier")
;
;              Requires the library "stats" to be present.;
;
;              IMPORTANT: The factor variable might be alphabetically reordered!
; -----------------------------------------------------------------------
; Example      library("graphic")
;              library("stats")
;              x = normal(4000)
;              m = matrix(1000)
;              factor = m | 2*m | 3*m | 4*m
;              bp1 = dispbox(x)
;              bp2 = dispbox(x, factor)
;              dd = createdisplay(1, 2)
;              show(dd, 1, 1, bp1)
;              show(dd, 1, 2, bp2)
; -----------------------------------------------------------------------
; Result       Shows a boxplot of x on the left. On the right four
;              boxplots of the four groups within x defined by factor are
;              shown.
; -----------------------------------------------------------------------
; Keywords     graphic primitives, boxplot
; -----------------------------------------------------------------------
; Author       Stephan Lauer 990224
; -----------------------------------------------------------------------
  
  // check if inX has multiple columns (forbidden)
  error(cols(inX).<>1, "dispbox : only one column of continuos data allowed")
  
  if (exist("inFactor") <> 1 && exist("inFactor") <> 2) // not numeric, not text
  // no factor present. Build simple boxplot
      theSortedX = sort(inX)
      theNumberOfRows = rows(inX)

      // calc median
      theMedianIndex = floor(theNumberOfRows / 2)
      if (abs(theNumberOfRows / 2 - theMedianIndex) < 0.1)
                  // theNumberOfRows is even. Average both middle data values
          theQ50 = (theSortedX[theMedianIndex] + theSortedX[theMedianIndex + 1]) .* 0.5
      else        // theNumberOfRows is odd. Median is the middle data value
          theQ50 = theSortedX[theMedianIndex + 1]
      endif

      // calc upper and lower quartile
      theLowerQIndex = floor(theNumberOfRows / 4)
      theUpperQIndex = theNumberOfRows - theLowerQIndex
      if (abs(theNumberOfRows / 4 - theLowerQIndex) < 0.1)
                  // theLowerQIndex contains an exact quarter
                  // this is the value for the quartile
           theQ25 = (theSortedX[theLowerQIndex]) 
           theQ75 = (theSortedX[theUpperQIndex])
      else        // average the boundary values
           theQ25 = (theSortedX[theLowerQIndex] + theSortedX[theLowerQIndex + 1]) .* 0.5 
           theQ75 = (theSortedX[theUpperQIndex - 1] + theSortedX[theUpperQIndex]) .* 0.5
      endif

      // calc lower and upper fence

      theFenceDistance = 1.5 .* (theQ75 - theQ25)
      theLowerFence = theSortedX[1 + sum(theSortedX < (theQ25 - theFenceDistance))]
      theUpperFence = theSortedX[sum(theSortedX <= (theQ75 + theFenceDistance))]


      // now build data for setmaskp,t,l

      thePoints = (0.25 ~ theLowerFence)                  // 1
      thePoints = thePoints | (0.75 ~ theLowerFence)      // 2
      thePoints = thePoints | (0 ~ theQ25)                // 3
      thePoints = thePoints | (1 ~ theQ25)                //
      thePoints = thePoints | (0 ~ theQ50 )               // 5
      thePoints = thePoints | (1 ~ theQ50 )               //
      thePoints = thePoints | (0 ~ theQ75 )               // 7
      thePoints = thePoints | (1 ~ theQ75 )               //
      thePoints = thePoints | (0.25 ~ theUpperFence )     // 9
      thePoints = thePoints | (0.75 ~ theUpperFence )     // 
      thePoints = thePoints | (0.5 ~ theLowerFence )      // 11
      thePoints = thePoints | (0.5 ~ theQ25 )             //
      thePoints = thePoints | (0.5 ~ theQ75 )             // 13
      thePoints = thePoints | (0.5 ~ theUpperFence )      //
      
     theLines = (1 ~ 2) 
     theLines = theLines | (3 ~ 4) 
     theLines = theLines | (5 ~ 6) 
     theLines = theLines | (7 ~ 8) 
     theLines = theLines | (9 ~ 10) 
     theLines = theLines | (11 ~ 12) 
     theLines = theLines | (13 ~ 14) 
     theLines = theLines | (3 ~ 7) 
     theLines = theLines | (4 ~ 8)

     theLineStyle = 1 .* matrix(9)    // solid
     thePointStyle = 0 .* matrix(14)  // don't show
     thePointSize = 8 .* matrix(14)

     // add outliers
     if (sum(theSortedX < (theQ25 - theFenceDistance)) > 0)
         theOutliers = paf(theSortedX, theSortedX < (theQ25 - theFenceDistance))
         thePoints = thePoints | (matrix(rows(theOutliers)).*0.5 ~ theOutliers)
         thePointStyle = thePointStyle | matrix(rows(theOutliers)) .* (3 + 9 .* (theOutliers < (theQ25 - 2.* theFenceDistance)))  
         thePointSize = thePointSize | matrix(rows(theOutliers)) .* 8
     endif

     if (sum(theSortedX > (theQ75 + theFenceDistance)) > 0)
         theOutliers = paf(theSortedX, theSortedX > (theQ75 + theFenceDistance))
         thePoints = thePoints | (matrix(rows(theOutliers)).*0.5 ~ theOutliers)
         thePointStyle = thePointStyle | matrix(rows(theOutliers)) .* (3 + 9 .* (theOutliers > (theQ75 + 2.* theFenceDistance)))  
         thePointSize = thePointSize | matrix(rows(theOutliers)) .* 8
     endif

     
   else //  factor present
        // split inX by inFactor and plot by group
  
      error(cols(inFactor).<>1, "dispbox : only one column of discrete data allowed")
      error(rows(inX).<>rows(inFactor), "dispbox : factor must have same length as data")
  
  
      theGroups = discrete(inFactor)

      theNumberOfGroups = rows(theGroups)

      // some inits
      thePoints = (0 ~ 0)
      theLines = (0 ~ 0)
      thePointStyle = 0
      thePointSize = 8
      theLineStyle = 0
      
      theCount = 1

      while (theCount <= theNumberOfGroups)

           // extract one group
           theData = paf(inX, inFactor==theGroups[theCount,1])

           // sort group
           theSortedX = sort(theData)
           theNumberOfRows = rows(theSortedX)

           // calc median
           theMedianIndex = floor(theNumberOfRows / 2)
           if (abs(theNumberOfRows / 2 - theMedianIndex) < 0.1)
                  // theNumberOfRows is even. Average both middle data values
               theQ50 = (theSortedX[theMedianIndex] + theSortedX[theMedianIndex + 1]) .* 0.5
           else        // theNumberOfRows is odd. Median is the middle data value
               theQ50 = theSortedX[theMedianIndex + 1]
           endif

           // calc upper and lower quartile
           theLowerQIndex = floor(theNumberOfRows / 4)
           theUpperQIndex = theNumberOfRows - theLowerQIndex
           if (abs(theNumberOfRows / 4 - theLowerQIndex) < 0.1)
                       // theLowerQIndex contains an exact quarter
                       // this is the value for the quartile
               theQ25 = (theSortedX[theLowerQIndex]) 
               theQ75 = (theSortedX[theUpperQIndex])
           else        // average the boundary values
               theQ25 = (theSortedX[theLowerQIndex] + theSortedX[theLowerQIndex + 1]) .* 0.5 
               theQ75 = (theSortedX[theUpperQIndex - 1] + theSortedX[theUpperQIndex]) .* 0.5
           endif
           
           // calc lower and upper fence

           theFenceDistance = 1.5 .* (theQ75 - theQ25)
           theLowerFence = theSortedX[1 + sum(theSortedX < (theQ25 - theFenceDistance))]
           theUpperFence = theSortedX[sum(theSortedX <= (theQ75 + theFenceDistance))]


           // now build data for setmaskp,t,l
           theCurrentIndex = rows(thePoints)

           thePoints = thePoints | ((theCount-1).*1.5 + 0.25 ~ theLowerFence)      // 1
           thePoints = thePoints | ((theCount-1).*1.5 + 0.75 ~ theLowerFence)      // 2
           thePoints = thePoints | ((theCount-1).*1.5 + 0 ~ theQ25)                // 3
           thePoints = thePoints | ((theCount-1).*1.5 + 1 ~ theQ25)                //
           thePoints = thePoints | ((theCount-1).*1.5 + 0 ~ theQ50 )               // 5
           thePoints = thePoints | ((theCount-1).*1.5 + 1 ~ theQ50 )               //
           thePoints = thePoints | ((theCount-1).*1.5 + 0 ~ theQ75 )               // 7
           thePoints = thePoints | ((theCount-1).*1.5 + 1 ~ theQ75 )               //
           thePoints = thePoints | ((theCount-1).*1.5 + 0.25 ~ theUpperFence )     // 9
           thePoints = thePoints | ((theCount-1).*1.5 + 0.75 ~ theUpperFence )     // 
           thePoints = thePoints | ((theCount-1).*1.5 + 0.5 ~ theLowerFence )      // 11
           thePoints = thePoints | ((theCount-1).*1.5 + 0.5 ~ theQ25 )             //
           thePoints = thePoints | ((theCount-1).*1.5 + 0.5 ~ theQ75 )             // 13
           thePoints = thePoints | ((theCount-1).*1.5 + 0.5 ~ theUpperFence )      //
      
           theLines = theLines | (theCurrentIndex + 1  ~ theCurrentIndex + 2) 
           theLines = theLines | (theCurrentIndex + 3  ~ theCurrentIndex + 4) 
           theLines = theLines | (theCurrentIndex + 5  ~ theCurrentIndex + 6) 
           theLines = theLines | (theCurrentIndex + 7  ~ theCurrentIndex + 8) 
           theLines = theLines | (theCurrentIndex + 9  ~ theCurrentIndex + 10) 
           theLines = theLines | (theCurrentIndex + 11 ~ theCurrentIndex + 12) 
           theLines = theLines | (theCurrentIndex + 13 ~ theCurrentIndex + 14) 
           theLines = theLines | (theCurrentIndex + 3  ~ theCurrentIndex + 7) 
           theLines = theLines | (theCurrentIndex + 4  ~ theCurrentIndex + 8)

           theLineStyle  = theLineStyle  | 1 .* matrix(9)    // solid
           thePointStyle = thePointStyle | 0 .* matrix(14)   // don't show
           thePointSize  = thePointSize  | 8 .* matrix(14)   // medium size

           
           // add outliers
           if (sum(theSortedX < (theQ25 - theFenceDistance)) > 0)
              theOutliers = paf(theSortedX, theSortedX < (theQ25 - theFenceDistance))
              thePoints = thePoints | ((theCount-1).*1.5 + matrix(rows(theOutliers)).*0.5 ~ theOutliers)
              thePointStyle = thePointStyle | matrix(rows(theOutliers)) .* (3 + 9 .* (theOutliers < (theQ25 - 2.* theFenceDistance)))  
              thePointSize = thePointSize | matrix(rows(theOutliers)) .* 8
           endif

           if (sum(theSortedX > (theQ75 + theFenceDistance)) > 0)
              theOutliers = paf(theSortedX, theSortedX > (theQ75 + theFenceDistance))
              thePoints = thePoints | ((theCount-1).*1.5 + matrix(rows(theOutliers)).*0.5 ~ theOutliers)
              thePointStyle = thePointStyle | matrix(rows(theOutliers)) .* (3 + 9 .* (theOutliers > (theQ75 + 2.* theFenceDistance)))  
              thePointSize = thePointSize | matrix(rows(theOutliers)) .* 8
           endif
           
            
           theCount = theCount + 1
      endo // while more groups left  
   endif // factor present or not

   // set the setmask? values

   setmaskp(thePoints, 0, thePointStyle, thePointSize)
   setmaskl(thePoints, theLines, 0, theLineStyle, 1) 

   // set return value
   bp = thePoints
endp