Implement a binary search tree by using lists

/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: LstBST.sas                                          */
/*   TITLE: Implement a binary search tree by using lists       */
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*                                                              */
/* SUPPORT: Rick Wicklin                UPDATE: July 2016       */
/*     REF:                                                     */
/*    MISC:                                                     */
/* Modules: BSTNewNode, BSTCreate, BSTInsert, BSTLookup,        */
/*          BSTGetKeyDepth, BSTDepth, BSTGetEdges, BSTPlot,     */
/*          BSTNewNode, BSTLookup, BSTCreate, BSTInsert         */
/****************************************************************/
%include sampsrc(LstQueue.sas);
proc iml;
load module=(QueueCreate QueuePush QueuePop);

/* A node is a three-item list:
   node[1] contains the KEY   value
   node[2] contains the LEFT  value (or empty if null)
   node[3] contains the RIGHT value (or empty if null) */
start BSTNewNode(value);
   node = ListCreate(3);     /* create list with 3 null items */
   call ListSetItem(node, 1, value);   /* set KEY value */
   return node;
finish;

/* pass a vector of key values to this routine to create a BST
   that has those values as keys */
start BSTCreate(x);
   bst =  BSTNewNode( x[1] );
   do i = 2 to nrow(colvec(x));
      run BSTInsert(bst, x[i]);
   end;
   return bst;
finish;

/* Insert a new branch for a key value in a BST. If the value
   already exists, do nothing (so there are never duplicates) */
start BSTInsert(root, value);
   if ListLen(root)=0 then do;   /* List empty. Set root node */
      root = BSTNewNode(value);
      return;
   end;
   /* otherwise, search tree to find value */
   found = BSTLookup(path, root, value);  /* if found, return */
   if ^found then                         /* else add to sub-path */
      call ListSetSubItem(root, path, BSTNewNode(value));
finish;


/* Search for a target value in a binary search tree.
   Input: root is the root node of a BST
          value is the target value
   Output: path contains the path to the node that contains the target
          value or the node where the target value can be inserted.
   Return: 1 if the target value is in the tree; 0 otherwise */
start BSTLookup(path, root, value);
   KEY = 1; LEFT = 2; RIGHT = 3;
   path = {};
   T = root;
   do while (1);
      if value = T$KEY then
         return 1;                  /* found it: return path to subitem */
      else if value < T$KEY then do;
         path = path || LEFT;       /* add to path */
         T = T$LEFT;                /* new root is left child */
      end;
      else do;
         path = path || RIGHT;      /* add to path */
         T = T$RIGHT;               /* new root is right child */
      end;
      if type(T)='U' then
         return 0;                  /* not found: return path to subitem */
   end;
finish;

/******************************************************/
/* iterative algorithm to find depth of a binary tree */
/******************************************************/
start BSTGetKeyDepth(root);
   KEY = 1; LEFT = 2; RIGHT = 3;
   v = {};        /* vector of key values (assume conformal matrices) */
   if ListLen(root)=0 then return v;
   Q = QueueCreate(root);
   depth = 0;

   do while (1);
      /* nodeCount (queue size) = number of nodes at current level */
      nodeCount = ListLen(Q);
      if nodeCount = 0 then return v;
      depth = depth + 1;
      /* pop all nodes of current level and push all nodes of next level */
      do while (nodeCount > 0);
         node = QueuePop(Q);
         v = v // (node$KEY || depth);
         child = node$LEFT;
         if type(child)^='U' then
            call QueuePush(Q, child);
         child = node$RIGHT;
         if type(child)^='U' then
            call QueuePush(Q, child);
         nodeCount = nodeCount - 1;
      end;
   end;
finish;

/* find depth of a binary tree */
start BSTDepth(root);
   v = BSTGetKeyDepth(root); /* (key, depth) */
   return max(v[,2]);
finish;

/* iterative algorithm to find edges that connect the nodes of a binary tree.
   Return edges as a two-column matrix (from, to) of nodes.
   The nodes are enumerated from top to bottom and from left to right. */
start BSTGetEdges(root);
   KEY = 1; LEFT = 2; RIGHT = 3;
   v = {};        /* vector of key values (assume conformal matrices) */
   if ListLen(root)=0 then return v;
   Q = QueueCreate(root);
   do while (1);
      /* nodeCount (queue size) = number of nodes at current level */
      nodeCount = ListLen(Q);
      if nodeCount = 0 then return v;
      /* pop all nodes of current level and push all nodes of next level */
      do while (nodeCount > 0);
         node = QueuePop(Q);
         parentID = node$KEY;
         child = node$LEFT;
         if type(child)^='U' then do;
            call QueuePush(Q, child);
            childID = child$KEY;
            v = v // (parentID || childID);
         end;
         child = node$RIGHT;
         if type(child)^='U' then do;
            call QueuePush(Q, child);
            childID = child$KEY;
            v = v // (parentID || childID);
         end;
         nodeCount = nodeCount - 1;
      end;
   end;
finish;

/* plot a binary search tree */
start BSTPlot(bst);
   v = BSTGetKeyDepth(bst);
   NodeID = v[,1];
   x = rank(NodeID);
   y = v[,2];
   create nodes var {"NodeID" "x" "y"}; append; close;

   edges = BSTGetEdges(bst);
   FromNode = edges[,1];
   ToNode = edges[,2];
   LinkID = T(1:nrow(FromNode));
   N = 2*nrow(FromNode);
   LX = j(N,1,.);
   LY = j(N,1,.);
   do i = 1 to nrow(FromNode);
      j = loc(NodeID = FromNode[i]);  /* index into NodeID */
      k = 2*i - 1;
      LX[k,] = X[j];
      LY[k,] = Y[j];
      j = loc(NodeId = ToNode[i]);
      LX[k+1,] = X[j];
      LY[k+1,] = Y[j];
      *print i (LX || LY);
   end;
   LinkId = LinkId || LinkID;
   create LinkCoords var {"LinkID" "LX" "LY"}; append; close;

   submit;
   data All;
   set nodes LinkCoords;
   run;

   proc sgplot data=All noautolegend nocycleattrs;
   format nodeID 3.;
   series x=LX y=LY / group=LinkID lineattrs=(color=black pattern=solid);
   scatter x=x y=y /  markerattrs=(symbol=circlefilled size=32)
                      filledoutlinedmarkers markerfillattrs=(color=white)
                      markeroutlineattrs=(thickness=2);
   scatter x=x y=y / markerchar=nodeID;
   yaxis reverse display=none;
   xaxis display=none;
   run;
   endsubmit;

   call delete({Nodes LinkCoords All});
finish;

store module=_all_;
quit;