The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/******************************************************************************
AUTHORS

Dotan Tsadok
Instructor: Shlomo Yona, University of Haifa, Israel. December 2002.
Current maintainer: Shlomo Yona	<shlomo@cs.haifa.ac.il>

COPYRIGHT

Copyright (c) 2002, 2003 Shlomo Yona. All rights reserved.

This library is free software. 
You can redistribute it and/or modify it under the same terms as Perl itself.  

DESCRIPTION OF THIS FILE:
This is the decleration file suffix_tree.h and it contains declarations of the
interface functions for constructing, searching and deleting a suffix tree, and
to data structures for describing the tree.

*******************************************************************************/

/* A type definition for a 32 bits variable - a double word. */
#define     DBL_WORD      unsigned long   

/* Error return value for some functions. Initialized  in ST_CreateTree. */
DBL_WORD    ST_ERROR;

/******************************************************************************/
/*                           DATA STRUCTURES                                  */
/******************************************************************************/
/* This structure describes a node and its incoming edge */
typedef struct SUFFIXTREENODE
{
   /* A linked list of sons of that node */
   struct SUFFIXTREENODE*   sons;
   /* A linked list of right siblings of that node */
   struct SUFFIXTREENODE*   right_sibling;
   /* A linked list of left siblings of that node */
   struct SUFFIXTREENODE*   left_sibling;
   /* A pointer to that node's father */
   struct SUFFIXTREENODE*   father;
   /* A pointer to the node that represents the largest 
   suffix of the current node */
   struct SUFFIXTREENODE*   suffix_link;
   /* Index of the start position of the node's path */
   DBL_WORD                 path_position;
   /* Start index of the incoming edge */
   DBL_WORD                 edge_label_start;
   /* End index of the incoming edge */
   DBL_WORD                 edge_label_end;
} NODE;

/* This structure describes a suffix tree */
typedef struct SUFFIXTREE
{
   /* The virtual end of all leaves */
   DBL_WORD                 e;
   /* The one and only real source string of the tree. All edge-labels
      contain only indices to this string and do not contain the characters
      themselves */
   char*           tree_string;
   /* The length of the source string */
   DBL_WORD                 length;
   /* The node that is the head of all others. It has no siblings nor a
      father */
   NODE*                    root;
} SUFFIX_TREE;


/******************************************************************************/
/*                         INTERFACE FUNCTIONS                                */
/******************************************************************************/
/* 
   ST_CreateTree :
   Allocates memory for the tree and starts Ukkonen's construction algorithm by
   calling SPA n times, where n is the length of the source string.

   Input : The source string and its length. The string is a sequence of
           characters (maximum of 256 different symbols) and not
           null-terminated. The only symbol that must not appear in the string
           is $ (the dollar sign). It is used as a unique symbol by the
           algorithm and is appended automatically at the end of the string (by
           the program, not by the user!). The meaning of the $ sign is
           connected to the implicit/explicit suffix tree transformation,
           detailed in Ukkonen's algorithm.

   Output: A pointer to the newly created tree. Keep this pointer in order to
           perform operations like search and delete on that tree. Obviously,
           no de-allocating of the tree space could be done if this pointer is
           lost, as the tree is allocated dynamically on the heap.
*/

SUFFIX_TREE* ST_CreateTree(const char*   str, DBL_WORD length);

/******************************************************************************/
/*
   ST_FindSubstring :
   Traces for a string in the tree. This function is used for substring search
   after tree construction is done. It simply traverses down the tree starting
   from the root until either the searched string is fully found ot one
   non-matching character is found. In this function skipping is not enabled
   because we don't know wether the string is in the tree or not (see function
   trace_string above).

   Input : The tree, the string W, and the length of W.

   Output: If the substring is found - returns the index of the starting
           position of the substring in the tree source string. If the substring
           is not found - returns ST_ERROR.
*/

DBL_WORD ST_FindSubstring(SUFFIX_TREE*      tree,   /* The suffix array */
                          char*    W,      /* The substring to find */
                          DBL_WORD          P);     /* The length of W */

/******************************************************************************/
/*
   ST_PrintTree :
   This function prints the tree. It simply starts the recoursive function
   ST_PrintNode from the root

   Input : The tree to be printed.
  
   Output: A print out of the tree to the screen.
*/

void ST_PrintTree(SUFFIX_TREE* tree);

/******************************************************************************/
/*
   ST_DeleteTree
   Deletes a whole suffix tree by starting a recoursive call to ST_DeleteSubTree
   from the root. After all of the nodes have been deleted, the function deletes
   the structure that represents the tree.

   Input : The tree to be deleted.

   Output: None.
*/

void ST_DeleteTree(SUFFIX_TREE* tree);

/******************************************************************************/
/*
   ST_SelfTest
   Self test of the tree - search for all substrings of the main string. See
   testing paragraph in the c-implementation_notes.txt file.

   Input : The tree to test.

   Output: 1 for success and 0 for failure. Prints a result message to the
           screen.
*/

DBL_WORD ST_SelfTest(SUFFIX_TREE* tree);