class RMMSeg::Token
A Token hold the text and related position information.
Public Instance Methods
end() → end_pos
click to toggle source
Get the end position of this token.
static VALUE tk_end(VALUE self) { Token *tk = (Token *)DATA_PTR(self); return tk->end; } static VALUE cToken; static VALUE tk_create(const char* base, const rmmseg::Token &t) { Token *tk = ALLOC(Token); int start = t.text-base; // This is necessary, see // http://lifegoo.pluskid.org/?p=348 volatile VALUE text = rb_str_new(t.text, t.length); tk->text = text; tk->start = INT2FIX(start); tk->end = INT2FIX(start + t.length); volatile VALUE tok = Data_Wrap_Struct(cToken, (RUBY_DATA_FUNC)tk_mark, (RUBY_DATA_FUNC)tk_free, tk); return tok; } /********************* * Algorithm Class *********************/ struct Algorithm { VALUE text; // hold to avoid being garbage collected rmmseg::Algorithm *algor; }; static void algor_mark(Algorithm *a) { rb_gc_mark(a->text); } static void algor_free(Algorithm *a) { free(a->algor); } static VALUE cAlgorithm; /* * Create an Algorithm object to do segmenting on +text+. * * call-seq: * new(text) -> algorithm * */ static VALUE algor_create(VALUE klass, VALUE text) { Algorithm *algor = ALLOC(Algorithm); void *mem; algor->text = text; mem = malloc(sizeof(rmmseg::Algorithm)); algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text), RSTRING_LEN(text)); return Data_Wrap_Struct(klass, (RUBY_DATA_FUNC)algor_mark, (RUBY_DATA_FUNC)algor_free, algor); } /* * Get next token. * * call-seq: * next_token() -> token * * Return +nil+ if no more token available. */ static VALUE algor_next_token(VALUE self) { Algorithm *algor = (Algorithm *)DATA_PTR(self); rmmseg::Token tk = algor->algor->next_token(); if (tk.length == 0) return Qnil; volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk); return rtk; } void Init_rmmseg() { mRMMSeg = rb_define_module("RMMSeg"); /* Manage dictionaries used by rmmseg. */ mDictionary = rb_define_module_under(mRMMSeg, "Dictionary"); rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1); rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1); rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3); rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1); /* A Token hold the text and related position information. */ cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject); rb_undef_method(rb_singleton_class(cToken), "new"); rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0); rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0); rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0); /* An Algorithm object use the MMSEG algorithm to do segmenting. */ cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject); rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1); rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0); } }
start() → start_pos
click to toggle source
Get the start position of this token.
static VALUE tk_start(VALUE self) { Token *tk = (Token *)DATA_PTR(self); return tk->start; } /* * Get the end position of this token. * * call-seq: * end() -> end_pos * */ static VALUE tk_end(VALUE self) { Token *tk = (Token *)DATA_PTR(self); return tk->end; } static VALUE cToken; static VALUE tk_create(const char* base, const rmmseg::Token &t) { Token *tk = ALLOC(Token); int start = t.text-base; // This is necessary, see // http://lifegoo.pluskid.org/?p=348 volatile VALUE text = rb_str_new(t.text, t.length); tk->text = text; tk->start = INT2FIX(start); tk->end = INT2FIX(start + t.length); volatile VALUE tok = Data_Wrap_Struct(cToken, (RUBY_DATA_FUNC)tk_mark, (RUBY_DATA_FUNC)tk_free, tk); return tok; } /********************* * Algorithm Class *********************/ struct Algorithm { VALUE text; // hold to avoid being garbage collected rmmseg::Algorithm *algor; }; static void algor_mark(Algorithm *a) { rb_gc_mark(a->text); } static void algor_free(Algorithm *a) { free(a->algor); } static VALUE cAlgorithm; /* * Create an Algorithm object to do segmenting on +text+. * * call-seq: * new(text) -> algorithm * */ static VALUE algor_create(VALUE klass, VALUE text) { Algorithm *algor = ALLOC(Algorithm); void *mem; algor->text = text; mem = malloc(sizeof(rmmseg::Algorithm)); algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text), RSTRING_LEN(text)); return Data_Wrap_Struct(klass, (RUBY_DATA_FUNC)algor_mark, (RUBY_DATA_FUNC)algor_free, algor); } /* * Get next token. * * call-seq: * next_token() -> token * * Return +nil+ if no more token available. */ static VALUE algor_next_token(VALUE self) { Algorithm *algor = (Algorithm *)DATA_PTR(self); rmmseg::Token tk = algor->algor->next_token(); if (tk.length == 0) return Qnil; volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk); return rtk; } void Init_rmmseg() { mRMMSeg = rb_define_module("RMMSeg"); /* Manage dictionaries used by rmmseg. */ mDictionary = rb_define_module_under(mRMMSeg, "Dictionary"); rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1); rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1); rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3); rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1); /* A Token hold the text and related position information. */ cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject); rb_undef_method(rb_singleton_class(cToken), "new"); rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0); rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0); rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0); /* An Algorithm object use the MMSEG algorithm to do segmenting. */ cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject); rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1); rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0); } }
text() → text
click to toggle source
Get the text held by this token.
static VALUE tk_text(VALUE self) { Token *tk = (Token *)DATA_PTR(self); return tk->text; } /* * Get the start position of this token. * * call-seq: * start() -> start_pos * */ static VALUE tk_start(VALUE self) { Token *tk = (Token *)DATA_PTR(self); return tk->start; } /* * Get the end position of this token. * * call-seq: * end() -> end_pos * */ static VALUE tk_end(VALUE self) { Token *tk = (Token *)DATA_PTR(self); return tk->end; } static VALUE cToken; static VALUE tk_create(const char* base, const rmmseg::Token &t) { Token *tk = ALLOC(Token); int start = t.text-base; // This is necessary, see // http://lifegoo.pluskid.org/?p=348 volatile VALUE text = rb_str_new(t.text, t.length); tk->text = text; tk->start = INT2FIX(start); tk->end = INT2FIX(start + t.length); volatile VALUE tok = Data_Wrap_Struct(cToken, (RUBY_DATA_FUNC)tk_mark, (RUBY_DATA_FUNC)tk_free, tk); return tok; } /********************* * Algorithm Class *********************/ struct Algorithm { VALUE text; // hold to avoid being garbage collected rmmseg::Algorithm *algor; }; static void algor_mark(Algorithm *a) { rb_gc_mark(a->text); } static void algor_free(Algorithm *a) { free(a->algor); } static VALUE cAlgorithm; /* * Create an Algorithm object to do segmenting on +text+. * * call-seq: * new(text) -> algorithm * */ static VALUE algor_create(VALUE klass, VALUE text) { Algorithm *algor = ALLOC(Algorithm); void *mem; algor->text = text; mem = malloc(sizeof(rmmseg::Algorithm)); algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text), RSTRING_LEN(text)); return Data_Wrap_Struct(klass, (RUBY_DATA_FUNC)algor_mark, (RUBY_DATA_FUNC)algor_free, algor); } /* * Get next token. * * call-seq: * next_token() -> token * * Return +nil+ if no more token available. */ static VALUE algor_next_token(VALUE self) { Algorithm *algor = (Algorithm *)DATA_PTR(self); rmmseg::Token tk = algor->algor->next_token(); if (tk.length == 0) return Qnil; volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk); return rtk; } void Init_rmmseg() { mRMMSeg = rb_define_module("RMMSeg"); /* Manage dictionaries used by rmmseg. */ mDictionary = rb_define_module_under(mRMMSeg, "Dictionary"); rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1); rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1); rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3); rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1); /* A Token hold the text and related position information. */ cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject); rb_undef_method(rb_singleton_class(cToken), "new"); rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0); rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0); rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0); /* An Algorithm object use the MMSEG algorithm to do segmenting. */ cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject); rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1); rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0); } }