module RMMSeg::Dictionary

Manage dictionaries used by rmmseg.

Attributes

dictionaries[RW]

An array of dictionaries used by RMMSeg. Each entry is of the following form:

[type, path]

where type can either :chars or :words. path is the path to the dictionary file.

The format of :chars dictionary is a collection of lines of the following form:

freq char

Where frequency is a number less than 65535. char is the character. They are spearated by exactly one space.

The format of :words dictionary is similar:

length word

except the first number is not the frequency, but the number of characters (not number of bytes) in the word.

There's a script (convert.rb) in the tools directory that can be used to convert and normalize dictionaries.

Public Class Methods

add(word, length, freq) click to toggle source

Add a word to the in-memory dictionary.

  • word is a String.

  • length is number of characters (not number of bytes) of the word to be added.

  • freq is the frequency of the word. This is only used when it is a one-character word.

static VALUE dic_add(VALUE mod, VALUE word, VALUE len, VALUE freq)
    {
        const char *str = RSTRING_PTR(word);
        int nbytes = RSTRING_LEN(word);
        rmmseg::Word *w = rmmseg::make_word(str, FIX2INT(len), FIX2INT(freq), nbytes);
        rmmseg::dict::add(w);
        return Qnil;
    }

    /*
     * Check whether one word is included in the dictionary.
     *
     * call-seq:
     *   has_word?(word)    -> result
     *
     * Return +true+ if the word is included in the dictionary,
     * +false+ otherwise.
     */ 
    static VALUE dic_has_word(VALUE mod, VALUE word)
    {
        const char *str = RSTRING_PTR(word);
        int nbytes = RSTRING_LEN(word);
        if (rmmseg::dict::get(str, nbytes) != NULL)
            return Qtrue;
        return Qfalse;
    }


    /**********************
     * Token Class
     **********************/
    struct Token
    {
        VALUE text;
        VALUE start;
        VALUE end;
    };

    static void tk_mark(Token *t)
    {
        // start and end are Fixnums, no need to mark
        rb_gc_mark(t->text);
    }
    static void tk_free(Token *t)
    {
        free(t);
    }

    /*
     * Get the text held by this token.
     *
     * call-seq:
     *   text()    -> text
     *   
     */
    static VALUE tk_text(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->text;
    }

    /*
     * Get the start position of this token.
     *
     * call-seq:
     *   start()    -> start_pos
     *
     */
    static VALUE tk_start(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->start;
    }

    /*
     * Get the end position of this token.
     *
     * call-seq:
     *   end()    -> end_pos
     *
     */
    static VALUE tk_end(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->end;
    }

    static VALUE cToken;
    static VALUE tk_create(const char* base, const rmmseg::Token &t)
    {
        Token *tk = ALLOC(Token);
        int start = t.text-base;

        // This is necessary, see
        // http://lifegoo.pluskid.org/?p=348
        volatile VALUE text = rb_str_new(t.text, t.length);
        tk->text = text;

        tk->start = INT2FIX(start);
        tk->end = INT2FIX(start + t.length);
        volatile VALUE tok = Data_Wrap_Struct(cToken,
                                (RUBY_DATA_FUNC)tk_mark,
                                (RUBY_DATA_FUNC)tk_free,
                                tk);
        return tok;
    }

    /*********************
     * Algorithm Class
     *********************/
    struct Algorithm
    {
        VALUE text;             // hold to avoid being garbage collected
        rmmseg::Algorithm *algor;
    };

    static void algor_mark(Algorithm *a)
    {
        rb_gc_mark(a->text);
    }
    static void algor_free(Algorithm *a)
    {
        free(a->algor);
    }

    static VALUE cAlgorithm;

    /*
     * Create an Algorithm object to do segmenting on +text+.
     *
     * call-seq:
     *   new(text)    -> algorithm
     *   
     */ 
    static VALUE algor_create(VALUE klass, VALUE text)
    {
        Algorithm *algor = ALLOC(Algorithm);
        void *mem;
        algor->text = text;
        mem = malloc(sizeof(rmmseg::Algorithm));
        algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text),
                                                  RSTRING_LEN(text));

        return Data_Wrap_Struct(klass,
                                (RUBY_DATA_FUNC)algor_mark,
                                (RUBY_DATA_FUNC)algor_free,
                                algor);
    }

    /*
     * Get next token.
     *
     * call-seq:
     *   next_token()   -> token
     *
     * Return +nil+ if no more token available.
     */ 
    static VALUE algor_next_token(VALUE self)
    {
        Algorithm *algor = (Algorithm *)DATA_PTR(self);
        rmmseg::Token tk = algor->algor->next_token();

        if (tk.length == 0)
            return Qnil;
        volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk);
        return rtk;
    }


    void Init_rmmseg()
    {
        mRMMSeg = rb_define_module("RMMSeg");

        /* Manage dictionaries used by rmmseg. */
        mDictionary = rb_define_module_under(mRMMSeg, "Dictionary");
        rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1);
        rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1);
        rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3);
        rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1);

        /* A Token hold the text and related position information. */
        cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject);
        rb_undef_method(rb_singleton_class(cToken), "new");
        rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0);
        rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0);
        rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0);

        /* An Algorithm object use the MMSEG algorithm to do segmenting. */
        cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject);
        rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1);
        rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0);
    }
}
add_dictionary(path, type) click to toggle source

Add a user defined dictionary, type can be :chars or :words. See doc of dictionaries.

# File lib/rmmseg/dictionary.rb, line 41
def add_dictionary(path, type)
  @dictionaries << [type, path]
end
has_word?(word) → result click to toggle source

Check whether one word is included in the dictionary.

Return true if the word is included in the dictionary, false otherwise.

static VALUE dic_has_word(VALUE mod, VALUE word)
    {
        const char *str = RSTRING_PTR(word);
        int nbytes = RSTRING_LEN(word);
        if (rmmseg::dict::get(str, nbytes) != NULL)
            return Qtrue;
        return Qfalse;
    }


    /**********************
     * Token Class
     **********************/
    struct Token
    {
        VALUE text;
        VALUE start;
        VALUE end;
    };

    static void tk_mark(Token *t)
    {
        // start and end are Fixnums, no need to mark
        rb_gc_mark(t->text);
    }
    static void tk_free(Token *t)
    {
        free(t);
    }

    /*
     * Get the text held by this token.
     *
     * call-seq:
     *   text()    -> text
     *   
     */
    static VALUE tk_text(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->text;
    }

    /*
     * Get the start position of this token.
     *
     * call-seq:
     *   start()    -> start_pos
     *
     */
    static VALUE tk_start(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->start;
    }

    /*
     * Get the end position of this token.
     *
     * call-seq:
     *   end()    -> end_pos
     *
     */
    static VALUE tk_end(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->end;
    }

    static VALUE cToken;
    static VALUE tk_create(const char* base, const rmmseg::Token &t)
    {
        Token *tk = ALLOC(Token);
        int start = t.text-base;

        // This is necessary, see
        // http://lifegoo.pluskid.org/?p=348
        volatile VALUE text = rb_str_new(t.text, t.length);
        tk->text = text;

        tk->start = INT2FIX(start);
        tk->end = INT2FIX(start + t.length);
        volatile VALUE tok = Data_Wrap_Struct(cToken,
                                (RUBY_DATA_FUNC)tk_mark,
                                (RUBY_DATA_FUNC)tk_free,
                                tk);
        return tok;
    }

    /*********************
     * Algorithm Class
     *********************/
    struct Algorithm
    {
        VALUE text;             // hold to avoid being garbage collected
        rmmseg::Algorithm *algor;
    };

    static void algor_mark(Algorithm *a)
    {
        rb_gc_mark(a->text);
    }
    static void algor_free(Algorithm *a)
    {
        free(a->algor);
    }

    static VALUE cAlgorithm;

    /*
     * Create an Algorithm object to do segmenting on +text+.
     *
     * call-seq:
     *   new(text)    -> algorithm
     *   
     */ 
    static VALUE algor_create(VALUE klass, VALUE text)
    {
        Algorithm *algor = ALLOC(Algorithm);
        void *mem;
        algor->text = text;
        mem = malloc(sizeof(rmmseg::Algorithm));
        algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text),
                                                  RSTRING_LEN(text));

        return Data_Wrap_Struct(klass,
                                (RUBY_DATA_FUNC)algor_mark,
                                (RUBY_DATA_FUNC)algor_free,
                                algor);
    }

    /*
     * Get next token.
     *
     * call-seq:
     *   next_token()   -> token
     *
     * Return +nil+ if no more token available.
     */ 
    static VALUE algor_next_token(VALUE self)
    {
        Algorithm *algor = (Algorithm *)DATA_PTR(self);
        rmmseg::Token tk = algor->algor->next_token();

        if (tk.length == 0)
            return Qnil;
        volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk);
        return rtk;
    }


    void Init_rmmseg()
    {
        mRMMSeg = rb_define_module("RMMSeg");

        /* Manage dictionaries used by rmmseg. */
        mDictionary = rb_define_module_under(mRMMSeg, "Dictionary");
        rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1);
        rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1);
        rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3);
        rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1);

        /* A Token hold the text and related position information. */
        cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject);
        rb_undef_method(rb_singleton_class(cToken), "new");
        rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0);
        rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0);
        rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0);

        /* An Algorithm object use the MMSEG algorithm to do segmenting. */
        cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject);
        rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1);
        rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0);
    }
}
load_chars(path) → status click to toggle source

Load a character dictionary.

Return true if loaded successfully, false otherwise.

static VALUE dic_load_chars(VALUE mod, VALUE path)
    {
        if (rmmseg::dict::load_chars(RSTRING_PTR(path)))
            return Qtrue;
        return Qfalse;
    }

    /*
     * Load a word dictionary.
     *
     * call-seq:
     *   load_words(path)    -> status
     *
     * Return +true+ if loaded successfully, +false+ otherwise.
     */ 
    static VALUE dic_load_words(VALUE mod, VALUE path)
    {
        if (rmmseg::dict::load_words(RSTRING_PTR(path)))
            return Qtrue;
        return Qfalse;
    }

    /*
     * Add a word to the in-memory dictionary.
     *
     * call-seq:
     *   add(word, length, freq)
     *
     * - +word+ is a String.
     * - +length+ is number of characters (not number of bytes) of the
     *   word to be added.
     * - +freq+ is the frequency of the word. This is only used when
     *   it is a one-character word.
     */ 
    static VALUE dic_add(VALUE mod, VALUE word, VALUE len, VALUE freq)
    {
        const char *str = RSTRING_PTR(word);
        int nbytes = RSTRING_LEN(word);
        rmmseg::Word *w = rmmseg::make_word(str, FIX2INT(len), FIX2INT(freq), nbytes);
        rmmseg::dict::add(w);
        return Qnil;
    }

    /*
     * Check whether one word is included in the dictionary.
     *
     * call-seq:
     *   has_word?(word)    -> result
     *
     * Return +true+ if the word is included in the dictionary,
     * +false+ otherwise.
     */ 
    static VALUE dic_has_word(VALUE mod, VALUE word)
    {
        const char *str = RSTRING_PTR(word);
        int nbytes = RSTRING_LEN(word);
        if (rmmseg::dict::get(str, nbytes) != NULL)
            return Qtrue;
        return Qfalse;
    }


    /**********************
     * Token Class
     **********************/
    struct Token
    {
        VALUE text;
        VALUE start;
        VALUE end;
    };

    static void tk_mark(Token *t)
    {
        // start and end are Fixnums, no need to mark
        rb_gc_mark(t->text);
    }
    static void tk_free(Token *t)
    {
        free(t);
    }

    /*
     * Get the text held by this token.
     *
     * call-seq:
     *   text()    -> text
     *   
     */
    static VALUE tk_text(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->text;
    }

    /*
     * Get the start position of this token.
     *
     * call-seq:
     *   start()    -> start_pos
     *
     */
    static VALUE tk_start(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->start;
    }

    /*
     * Get the end position of this token.
     *
     * call-seq:
     *   end()    -> end_pos
     *
     */
    static VALUE tk_end(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->end;
    }

    static VALUE cToken;
    static VALUE tk_create(const char* base, const rmmseg::Token &t)
    {
        Token *tk = ALLOC(Token);
        int start = t.text-base;

        // This is necessary, see
        // http://lifegoo.pluskid.org/?p=348
        volatile VALUE text = rb_str_new(t.text, t.length);
        tk->text = text;

        tk->start = INT2FIX(start);
        tk->end = INT2FIX(start + t.length);
        volatile VALUE tok = Data_Wrap_Struct(cToken,
                                (RUBY_DATA_FUNC)tk_mark,
                                (RUBY_DATA_FUNC)tk_free,
                                tk);
        return tok;
    }

    /*********************
     * Algorithm Class
     *********************/
    struct Algorithm
    {
        VALUE text;             // hold to avoid being garbage collected
        rmmseg::Algorithm *algor;
    };

    static void algor_mark(Algorithm *a)
    {
        rb_gc_mark(a->text);
    }
    static void algor_free(Algorithm *a)
    {
        free(a->algor);
    }

    static VALUE cAlgorithm;

    /*
     * Create an Algorithm object to do segmenting on +text+.
     *
     * call-seq:
     *   new(text)    -> algorithm
     *   
     */ 
    static VALUE algor_create(VALUE klass, VALUE text)
    {
        Algorithm *algor = ALLOC(Algorithm);
        void *mem;
        algor->text = text;
        mem = malloc(sizeof(rmmseg::Algorithm));
        algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text),
                                                  RSTRING_LEN(text));

        return Data_Wrap_Struct(klass,
                                (RUBY_DATA_FUNC)algor_mark,
                                (RUBY_DATA_FUNC)algor_free,
                                algor);
    }

    /*
     * Get next token.
     *
     * call-seq:
     *   next_token()   -> token
     *
     * Return +nil+ if no more token available.
     */ 
    static VALUE algor_next_token(VALUE self)
    {
        Algorithm *algor = (Algorithm *)DATA_PTR(self);
        rmmseg::Token tk = algor->algor->next_token();

        if (tk.length == 0)
            return Qnil;
        volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk);
        return rtk;
    }


    void Init_rmmseg()
    {
        mRMMSeg = rb_define_module("RMMSeg");

        /* Manage dictionaries used by rmmseg. */
        mDictionary = rb_define_module_under(mRMMSeg, "Dictionary");
        rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1);
        rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1);
        rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3);
        rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1);

        /* A Token hold the text and related position information. */
        cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject);
        rb_undef_method(rb_singleton_class(cToken), "new");
        rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0);
        rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0);
        rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0);

        /* An Algorithm object use the MMSEG algorithm to do segmenting. */
        cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject);
        rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1);
        rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0);
    }
}
load_dictionaries() click to toggle source

Load dictionaries. Call this method after set up the path of the dictionaries needed to load and before any Algorithm object is created.

# File lib/rmmseg/dictionary.rb, line 48
def load_dictionaries()
  @dictionaries.each do |type, path|
    if type == :chars
      load_chars(path)
    elsif type == :words
      load_words(path)
    end
  end
end
load_words(path) → status click to toggle source

Load a word dictionary.

Return true if loaded successfully, false otherwise.

static VALUE dic_load_words(VALUE mod, VALUE path)
    {
        if (rmmseg::dict::load_words(RSTRING_PTR(path)))
            return Qtrue;
        return Qfalse;
    }

    /*
     * Add a word to the in-memory dictionary.
     *
     * call-seq:
     *   add(word, length, freq)
     *
     * - +word+ is a String.
     * - +length+ is number of characters (not number of bytes) of the
     *   word to be added.
     * - +freq+ is the frequency of the word. This is only used when
     *   it is a one-character word.
     */ 
    static VALUE dic_add(VALUE mod, VALUE word, VALUE len, VALUE freq)
    {
        const char *str = RSTRING_PTR(word);
        int nbytes = RSTRING_LEN(word);
        rmmseg::Word *w = rmmseg::make_word(str, FIX2INT(len), FIX2INT(freq), nbytes);
        rmmseg::dict::add(w);
        return Qnil;
    }

    /*
     * Check whether one word is included in the dictionary.
     *
     * call-seq:
     *   has_word?(word)    -> result
     *
     * Return +true+ if the word is included in the dictionary,
     * +false+ otherwise.
     */ 
    static VALUE dic_has_word(VALUE mod, VALUE word)
    {
        const char *str = RSTRING_PTR(word);
        int nbytes = RSTRING_LEN(word);
        if (rmmseg::dict::get(str, nbytes) != NULL)
            return Qtrue;
        return Qfalse;
    }


    /**********************
     * Token Class
     **********************/
    struct Token
    {
        VALUE text;
        VALUE start;
        VALUE end;
    };

    static void tk_mark(Token *t)
    {
        // start and end are Fixnums, no need to mark
        rb_gc_mark(t->text);
    }
    static void tk_free(Token *t)
    {
        free(t);
    }

    /*
     * Get the text held by this token.
     *
     * call-seq:
     *   text()    -> text
     *   
     */
    static VALUE tk_text(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->text;
    }

    /*
     * Get the start position of this token.
     *
     * call-seq:
     *   start()    -> start_pos
     *
     */
    static VALUE tk_start(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->start;
    }

    /*
     * Get the end position of this token.
     *
     * call-seq:
     *   end()    -> end_pos
     *
     */
    static VALUE tk_end(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->end;
    }

    static VALUE cToken;
    static VALUE tk_create(const char* base, const rmmseg::Token &t)
    {
        Token *tk = ALLOC(Token);
        int start = t.text-base;

        // This is necessary, see
        // http://lifegoo.pluskid.org/?p=348
        volatile VALUE text = rb_str_new(t.text, t.length);
        tk->text = text;

        tk->start = INT2FIX(start);
        tk->end = INT2FIX(start + t.length);
        volatile VALUE tok = Data_Wrap_Struct(cToken,
                                (RUBY_DATA_FUNC)tk_mark,
                                (RUBY_DATA_FUNC)tk_free,
                                tk);
        return tok;
    }

    /*********************
     * Algorithm Class
     *********************/
    struct Algorithm
    {
        VALUE text;             // hold to avoid being garbage collected
        rmmseg::Algorithm *algor;
    };

    static void algor_mark(Algorithm *a)
    {
        rb_gc_mark(a->text);
    }
    static void algor_free(Algorithm *a)
    {
        free(a->algor);
    }

    static VALUE cAlgorithm;

    /*
     * Create an Algorithm object to do segmenting on +text+.
     *
     * call-seq:
     *   new(text)    -> algorithm
     *   
     */ 
    static VALUE algor_create(VALUE klass, VALUE text)
    {
        Algorithm *algor = ALLOC(Algorithm);
        void *mem;
        algor->text = text;
        mem = malloc(sizeof(rmmseg::Algorithm));
        algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text),
                                                  RSTRING_LEN(text));

        return Data_Wrap_Struct(klass,
                                (RUBY_DATA_FUNC)algor_mark,
                                (RUBY_DATA_FUNC)algor_free,
                                algor);
    }

    /*
     * Get next token.
     *
     * call-seq:
     *   next_token()   -> token
     *
     * Return +nil+ if no more token available.
     */ 
    static VALUE algor_next_token(VALUE self)
    {
        Algorithm *algor = (Algorithm *)DATA_PTR(self);
        rmmseg::Token tk = algor->algor->next_token();

        if (tk.length == 0)
            return Qnil;
        volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk);
        return rtk;
    }


    void Init_rmmseg()
    {
        mRMMSeg = rb_define_module("RMMSeg");

        /* Manage dictionaries used by rmmseg. */
        mDictionary = rb_define_module_under(mRMMSeg, "Dictionary");
        rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1);
        rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1);
        rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3);
        rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1);

        /* A Token hold the text and related position information. */
        cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject);
        rb_undef_method(rb_singleton_class(cToken), "new");
        rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0);
        rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0);
        rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0);

        /* An Algorithm object use the MMSEG algorithm to do segmenting. */
        cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject);
        rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1);
        rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0);
    }
}