Current File : //usr/include/datrie/trie.h
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * libdatrie - Double-Array Trie Library
 * Copyright (C) 2006  Theppitak Karoonboonyanan <theppitak@gmail.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

/*
 * trie.h - Trie data type and functions
 * Created: 2006-08-11
 * Author:  Theppitak Karoonboonyanan <theppitak@gmail.com>
 */

#ifndef __TRIE_H
#define __TRIE_H

#include <datrie/triedefs.h>
#include <datrie/alpha-map.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
 * @file trie.h
 * @brief Trie data type and functions
 *
 * Trie is a kind of digital search tree, an efficient indexing method with
 * O(1) time complexity for searching. Comparably as efficient as hashing,
 * trie also provides flexibility on incremental matching and key spelling
 * manipulation. This makes it ideal for lexical analyzers, as well as
 * spelling dictionaries.
 *
 * This library is an implementation of double-array structure for representing
 * trie, as proposed by Junichi Aoe. The details of the implementation can be
 * found at http://linux.thai.net/~thep/datrie/datrie.html
 *
 * A Trie is associated with an AlphaMap, a map between actual alphabet
 * characters and the raw characters used to walk through trie.
 * You can define the alphabet set by adding ranges of character codes
 * to it before associating it to a trie. And the keys to be added to the trie
 * must comprise only characters in such ranges. Note that the size of the
 * alphabet set is limited to 256 (TRIE_CHAR_MAX + 1), and the AlphaMap
 * will map the alphabet characters to raw codes in the range 0..255
 * (0..TRIE_CHAR_MAX). The alphabet character ranges need not be continuous,
 * but the mapped raw codes will be continuous, for the sake of compactness
 * of the trie.
 *
 * A new Trie can be created in memory using trie_new(), saved to file using
 * trie_save(), and loaded later with trie_new_from_file().
 * It can even be embeded in another file using trie_fwrite() and read back
 * using trie_fread().
 * After use, Trie objects must be freed using trie_free().
 *
 * Operations on trie include:
 *
 * - Add/delete entries with trie_store() and trie_delete()
 * - Retrieve entries with trie_retrieve()
 * - Walk through trie stepwise with TrieState and its functions
 *   (trie_root(), trie_state_walk(), trie_state_rewind(),
 *   trie_state_clone(), trie_state_copy(),
 *   trie_state_is_walkable(), trie_state_walkable_chars(),
 *   trie_state_is_single(), trie_state_get_data().
 *   And do not forget to free TrieState objects with trie_state_free()
 *   after use.)
 * - Enumerate all keys using trie_enumerate()
 * - Iterate entries using TrieIterator and its functions
 *   (trie_iterator_new(), trie_iterator_next(), trie_iterator_get_key(),
 *   trie_iterator_get_data().
 *   And do not forget to free TrieIterator objects with trie_iterator_free()
 *   after use.)
 */

/**
 * @brief Trie data type
 */
typedef struct _Trie   Trie;

/**
 * @brief Trie enumeration function
 *
 * @param key  : the key of the entry
 * @param data : the data of the entry
 * @param user_data : the user-supplied data on enumerate call
 *
 * @return TRUE to continue enumeration, FALSE to stop
 */
typedef Bool (*TrieEnumFunc) (const AlphaChar  *key,
                              TrieData          key_data,
                              void             *user_data);

/**
 * @brief Trie walking state
 */
typedef struct _TrieState TrieState;


/**
 * @brief Trie iteration state
 */
typedef struct _TrieIterator TrieIterator;

/*-----------------------*
 *   GENERAL FUNCTIONS   *
 *-----------------------*/

Trie *  trie_new (const AlphaMap *alpha_map);

Trie *  trie_new_from_file (const char *path);

Trie *  trie_fread (FILE *file);

void    trie_free (Trie *trie);

size_t  trie_get_serialized_size (Trie *trie);

void    trie_serialize (Trie *trie, uint8 *ptr);

int     trie_save (Trie *trie, const char *path);

int     trie_fwrite (Trie *trie, FILE *file);

Bool    trie_is_dirty (const Trie *trie);


/*------------------------------*
 *   GENERAL QUERY OPERATIONS   *
 *------------------------------*/

Bool    trie_retrieve (const Trie      *trie,
                       const AlphaChar *key,
                       TrieData        *o_data);

Bool    trie_store (Trie *trie, const AlphaChar *key, TrieData data);

Bool    trie_store_if_absent (Trie *trie, const AlphaChar *key, TrieData data);

Bool    trie_delete (Trie *trie, const AlphaChar *key);

Bool    trie_enumerate (const Trie     *trie,
                        TrieEnumFunc    enum_func,
                        void           *user_data);


/*-------------------------------*
 *   STEPWISE QUERY OPERATIONS   *
 *-------------------------------*/

TrieState * trie_root (const Trie *trie);


/*----------------*
 *   TRIE STATE   *
 *----------------*/

TrieState * trie_state_clone (const TrieState *s);

void        trie_state_copy (TrieState *dst, const TrieState *src);

void      trie_state_free (TrieState *s);

void      trie_state_rewind (TrieState *s);

Bool      trie_state_walk (TrieState *s, AlphaChar c);

Bool      trie_state_is_walkable (const TrieState *s, AlphaChar c);

int       trie_state_walkable_chars (const TrieState  *s,
                                     AlphaChar         chars[],
                                     int               chars_nelm);

/**
 * @brief Check for terminal state
 *
 * @param s    : the state to check
 *
 * @return boolean value indicating whether it is a terminal state
 *
 * Check if the given state is a terminal state. A terminal state is a trie
 * state that terminates a key, and stores a value associated with it.
 */
#define   trie_state_is_terminal(s) trie_state_is_walkable((s),0)

Bool      trie_state_is_single (const TrieState *s);

/**
 * @brief Check for leaf state
 *
 * @param s    : the state to check
 *
 * @return boolean value indicating whether it is a leaf state
 *
 * Check if the given state is a leaf state. A leaf state is a terminal state
 * that has no other branch.
 */
#define   trie_state_is_leaf(s) \
    (trie_state_is_single(s) && trie_state_is_terminal(s))

TrieData trie_state_get_data (const TrieState *s);


/*----------------------*
 *    ENTRY ITERATION   *
 *----------------------*/

TrieIterator *  trie_iterator_new (TrieState *s);

void            trie_iterator_free (TrieIterator *iter);

Bool            trie_iterator_next (TrieIterator *iter);

AlphaChar *     trie_iterator_get_key (const TrieIterator *iter);

TrieData        trie_iterator_get_data (const TrieIterator *iter);


#ifdef __cplusplus
}
#endif

#endif  /* __TRIE_H */

/*
vi:ts=4:ai:expandtab
*/