Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

include/xapian/enquire.h

Go to the documentation of this file.
00001 00004 /* ----START-LICENCE---- 00005 * Copyright 1999,2000,2001 BrightStation PLC 00006 * Copyright 2001,2002 Ananova Ltd 00007 * Copyright 2002,2003,2004,2005 Olly Betts 00008 * 00009 * This program is free software; you can redistribute it and/or 00010 * modify it under the terms of the GNU General Public License as 00011 * published by the Free Software Foundation; either version 2 of the 00012 * License, or (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00022 * USA 00023 * -----END-LICENCE----- 00024 */ 00025 00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H 00027 #define XAPIAN_INCLUDED_ENQUIRE_H 00028 00029 #include <string> 00030 #include <time.h> // for time_t 00031 00032 #include <xapian/base.h> 00033 #include <xapian/error.h> 00034 #include <xapian/types.h> 00035 00036 namespace Xapian { 00037 00038 class Database; 00039 class Document; 00040 class ErrorHandler; 00041 class MSetIterator; 00042 class Query; 00043 class TermIterator; 00044 class Weight; 00045 00049 class MSet { 00050 public: 00051 class Internal; 00053 Xapian::Internal::RefCntPtr<Internal> internal; 00054 00056 explicit MSet(MSet::Internal * internal_); 00057 00059 MSet(); 00060 00062 ~MSet(); 00063 00065 MSet(const MSet & other); 00066 00068 void operator=(const MSet &other); 00069 00085 void fetch(const MSetIterator &begin, const MSetIterator &end) const; 00086 00089 void fetch(const MSetIterator &item) const; 00090 00093 void fetch() const; 00094 00099 Xapian::percent convert_to_percent(Xapian::weight wt) const; 00100 00102 Xapian::percent convert_to_percent(const MSetIterator &it) const; 00103 00111 Xapian::doccount get_termfreq(const std::string &tname) const; 00112 00120 Xapian::weight get_termweight(const std::string &tname) const; 00121 00129 Xapian::doccount get_firstitem() const; 00130 00140 Xapian::doccount get_matches_lower_bound() const; 00141 00154 Xapian::doccount get_matches_estimated() const; 00155 00165 Xapian::doccount get_matches_upper_bound() const; 00166 00172 Xapian::weight get_max_possible() const; 00173 00187 Xapian::weight get_max_attained() const; 00188 00190 Xapian::doccount size() const; 00191 00193 Xapian::doccount max_size() const { return size(); } 00194 00196 bool empty() const; 00197 00199 void swap(MSet & other); 00200 00202 MSetIterator begin() const; 00203 00205 MSetIterator end() const; 00206 00208 MSetIterator back() const; 00209 00219 MSetIterator operator[](Xapian::doccount i) const; 00220 00222 00223 typedef MSetIterator value_type; // FIXME: not assignable... 00224 typedef MSetIterator iterator; 00225 typedef MSetIterator const_iterator; 00226 typedef MSetIterator & reference; // Hmm 00227 typedef MSetIterator & const_reference; 00228 typedef MSetIterator * pointer; // Hmm 00229 typedef Xapian::doccount_diff difference_type; 00230 typedef Xapian::doccount size_type; 00232 00236 std::string get_description() const; 00237 }; 00238 00242 class MSetIterator { 00243 private: 00244 friend class MSet; 00245 friend bool operator==(const MSetIterator &a, const MSetIterator &b); 00246 friend bool operator!=(const MSetIterator &a, const MSetIterator &b); 00247 00248 MSetIterator(Xapian::doccount index_, const MSet & mset_) 00249 : index(index_), mset(mset_) { } 00250 00251 Xapian::doccount index; 00252 MSet mset; 00253 00254 public: 00258 MSetIterator() : index(0), mset() { } 00259 00260 ~MSetIterator() { } 00261 00263 MSetIterator(const MSetIterator &other) { 00264 index = other.index; 00265 mset = other.mset; 00266 } 00267 00269 void operator=(const MSetIterator &other) { 00270 index = other.index; 00271 mset = other.mset; 00272 } 00273 00275 MSetIterator & operator++() { 00276 ++index; 00277 return *this; 00278 } 00279 00281 MSetIterator operator++(int) { 00282 MSetIterator tmp = *this; 00283 ++index; 00284 return tmp; 00285 } 00286 00288 MSetIterator & operator--() { 00289 --index; 00290 return *this; 00291 } 00292 00294 MSetIterator operator--(int) { 00295 MSetIterator tmp = *this; 00296 --index; 00297 return tmp; 00298 } 00299 00301 Xapian::docid operator*() const; 00302 00321 Xapian::Document get_document() const; 00322 00329 Xapian::doccount get_rank() const { 00330 return mset.get_firstitem() + index; 00331 } 00332 00334 Xapian::weight get_weight() const; 00335 00352 Xapian::doccount get_collapse_count() const; 00353 00359 Xapian::percent get_percent() const; 00360 00364 std::string get_description() const; 00365 00367 00368 typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator 00369 typedef Xapian::docid value_type; 00370 typedef Xapian::doccount_diff difference_type; 00371 typedef Xapian::docid * pointer; 00372 typedef Xapian::docid & reference; 00374 }; 00375 00376 inline bool operator==(const MSetIterator &a, const MSetIterator &b) 00377 { 00378 return (a.index == b.index); 00379 } 00380 00381 inline bool operator!=(const MSetIterator &a, const MSetIterator &b) 00382 { 00383 return (a.index != b.index); 00384 } 00385 00386 class ESetIterator; 00387 00392 class ESet { 00393 public: 00394 class Internal; 00396 Xapian::Internal::RefCntPtr<Internal> internal; 00397 00399 ESet(); 00400 00402 ~ESet(); 00403 00405 ESet(const ESet & other); 00406 00408 void operator=(const ESet &other); 00409 00414 Xapian::termcount get_ebound() const; 00415 00417 Xapian::termcount size() const; 00418 00420 Xapian::termcount max_size() const { return size(); } 00421 00423 bool empty() const; 00424 00426 void swap(ESet & other); 00427 00429 ESetIterator begin() const; 00430 00432 ESetIterator end() const; 00433 00435 ESetIterator back() const; 00436 00438 ESetIterator operator[](Xapian::doccount i) const; 00439 00444 std::string get_description() const; 00445 }; 00446 00448 class ESetIterator { 00449 private: 00450 friend class ESet; 00451 friend bool operator==(const ESetIterator &a, const ESetIterator &b); 00452 friend bool operator!=(const ESetIterator &a, const ESetIterator &b); 00453 00454 ESetIterator(Xapian::termcount index_, const ESet & eset_) 00455 : index(index_), eset(eset_) { } 00456 00457 Xapian::termcount index; 00458 ESet eset; 00459 00460 public: 00464 ESetIterator() : index(0), eset() { } 00465 00466 ~ESetIterator() { } 00467 00469 ESetIterator(const ESetIterator &other) { 00470 index = other.index; 00471 eset = other.eset; 00472 } 00473 00475 void operator=(const ESetIterator &other) { 00476 index = other.index; 00477 eset = other.eset; 00478 } 00479 00481 ESetIterator & operator++() { 00482 ++index; 00483 return *this; 00484 } 00485 00487 ESetIterator operator++(int) { 00488 ESetIterator tmp = *this; 00489 ++index; 00490 return tmp; 00491 } 00492 00494 ESetIterator & operator--() { 00495 --index; 00496 return *this; 00497 } 00498 00500 ESetIterator operator--(int) { 00501 ESetIterator tmp = *this; 00502 --index; 00503 return tmp; 00504 } 00505 00507 const std::string & operator *() const; 00508 00510 Xapian::weight get_weight() const; 00511 00515 std::string get_description() const; 00516 00518 00519 typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator! 00520 typedef std::string value_type; 00521 typedef Xapian::termcount_diff difference_type; 00522 typedef std::string * pointer; 00523 typedef std::string & reference; 00525 }; 00526 00527 inline bool operator==(const ESetIterator &a, const ESetIterator &b) 00528 { 00529 return (a.index == b.index); 00530 } 00531 00532 inline bool operator!=(const ESetIterator &a, const ESetIterator &b) 00533 { 00534 return (a.index != b.index); 00535 } 00536 00541 class RSet { 00542 public: 00544 class Internal; 00545 00547 Xapian::Internal::RefCntPtr<Internal> internal; 00548 00550 RSet(const RSet &rset); 00551 00553 void operator=(const RSet &rset); 00554 00556 RSet(); 00557 00559 ~RSet(); 00560 00562 Xapian::doccount size() const; 00563 00565 bool empty() const; 00566 00568 void add_document(Xapian::docid did); 00569 00571 void add_document(const Xapian::MSetIterator & i) { add_document(*i); } 00572 00574 void remove_document(Xapian::docid did); 00575 00577 void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); } 00578 00580 bool contains(Xapian::docid did) const; 00581 00583 bool contains(const Xapian::MSetIterator & i) { return contains(*i); } 00584 00589 std::string get_description() const; 00590 }; 00591 00594 class MatchDecider { 00595 public: 00598 virtual int operator()(const Xapian::Document &doc) const = 0; 00599 00601 virtual ~MatchDecider() {} 00602 }; 00603 00606 class ExpandDecider { 00607 public: 00610 virtual int operator()(const std::string & tname) const = 0; 00611 00613 virtual ~ExpandDecider() {} 00614 }; 00615 00626 class Enquire { 00627 private: 00629 Enquire(const Enquire &); 00630 00632 void operator=(const Enquire &); 00633 00634 public: 00635 class Internal; 00637 Xapian::Internal::RefCntPtr<Internal> internal; 00638 00654 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0); 00655 00658 ~Enquire(); 00659 00666 void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0); 00667 00674 const Xapian::Query & get_query(); 00675 00682 void set_weighting_scheme(const Weight &weight_); 00683 00710 void set_collapse_key(Xapian::valueno collapse_key); 00711 00712 typedef enum { 00713 ASCENDING = 1, 00714 DESCENDING = 0, 00715 DONT_CARE = 2 00716 } docid_order; 00717 00741 void set_docid_order(docid_order order); 00742 00744 void set_sort_forward(bool sort_forward) { 00745 set_docid_order(sort_forward ? ASCENDING : DESCENDING); 00746 } 00747 00766 void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0); 00767 00769 void set_sorting(Xapian::valueno sort_key, int sort_bands, 00770 bool sort_by_relevance = false) { 00771 if (sort_bands > 1) { 00772 throw Xapian::UnimplementedError("sort bands are no longer supported"); 00773 } 00774 if (sort_bands == 0 || sort_key == Xapian::valueno(-1)) { 00775 set_sort_by_relevance(); 00776 } else if (!sort_by_relevance) { 00777 set_sort_by_value(sort_key); 00778 } else { 00779 set_sort_by_value_then_relevance(sort_key); 00780 } 00781 } 00782 00785 void set_sort_by_relevance(); 00786 00797 void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true); 00798 void set_sort_by_value_then_relevance(Xapian::valueno sort_key, 00799 bool ascending = true); 00800 // FIXME: consider implementing this: 00801 // void set_sort_by_relevance_then_value(Xapian::valueno sort_key, 00802 // bool ascending); 00803 00815 void set_bias(Xapian::weight bias_weight, time_t bias_halflife); 00816 00842 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, 00843 Xapian::doccount checkatleast = 0, 00844 const RSet * omrset = 0, 00845 const MatchDecider * mdecider = 0) const; 00846 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, 00847 const RSet * omrset, 00848 const MatchDecider * mdecider = 0) const { 00849 return get_mset(first, maxitems, 0, omrset, mdecider); 00850 } 00851 00852 static const int include_query_terms = 1; 00853 static const int use_exact_termfreq = 2; 00876 ESet get_eset(Xapian::termcount maxitems, 00877 const RSet & omrset, 00878 int flags = 0, 00879 double k = 1.0, 00880 const Xapian::ExpandDecider * edecider = 0) const; 00881 00895 inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, 00896 const Xapian::ExpandDecider * edecider) const { 00897 return get_eset(maxitems, omrset, 0, 1.0, edecider); 00898 } 00899 00928 TermIterator get_matching_terms_begin(Xapian::docid did) const; 00929 00931 TermIterator get_matching_terms_end(Xapian::docid did) const; 00932 00955 TermIterator get_matching_terms_begin(const MSetIterator &it) const; 00956 00958 TermIterator get_matching_terms_end(const MSetIterator &it) const; 00959 00966 void register_match_decider(const std::string &name, 00967 const MatchDecider *mdecider = NULL); 00968 00972 std::string get_description() const; 00973 }; 00974 00975 } 00976 00977 class SocketServer; 00978 00979 namespace Xapian { 00980 00982 class Weight { 00983 friend class Enquire; // So Enquire can clone us 00984 friend class ::SocketServer; // So SocketServer can clone us - FIXME 00985 public: 00986 class Internal; 00987 protected: 00988 Weight(const Weight &); 00989 private: 00990 void operator=(Weight &); 00991 01001 virtual Weight * clone() const = 0; 01002 01003 protected: 01004 const Internal * internal; // Weight::Internal == StatsSource 01005 Xapian::doclength querysize; 01006 Xapian::termcount wqf; 01007 std::string tname; 01008 01009 public: 01010 Weight() { } 01011 virtual ~Weight() { } 01012 01025 Weight * create(const Internal * internal_, Xapian::doclength querysize_, 01026 Xapian::termcount wqf_, std::string tname_) const { 01027 Weight * wt = clone(); 01028 wt->internal = internal_; 01029 wt->querysize = querysize_; 01030 wt->wqf = wqf_; 01031 wt->tname = tname_; 01032 return wt; 01033 } 01034 01039 virtual std::string name() const = 0; 01040 01042 virtual std::string serialise() const = 0; 01043 01045 virtual Weight * unserialise(const std::string &s) const = 0; 01046 01054 virtual Xapian::weight get_sumpart(Xapian::termcount wdf, 01055 Xapian::doclength len) const = 0; 01056 01062 virtual Xapian::weight get_maxpart() const = 0; 01063 01072 virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0; 01073 01077 virtual Xapian::weight get_maxextra() const = 0; 01078 01080 virtual bool get_sumpart_needs_doclength() const { return true; } 01081 }; 01082 01084 class BoolWeight : public Weight { 01085 public: 01086 BoolWeight * clone() const { 01087 return new BoolWeight; 01088 } 01089 BoolWeight() { } 01090 ~BoolWeight() { } 01091 std::string name() const { return "Bool"; } 01092 std::string serialise() const { return ""; } 01093 BoolWeight * unserialise(const std::string & /*s*/) const { 01094 return new BoolWeight; 01095 } 01096 Xapian::weight get_sumpart(Xapian::termcount /*wdf*/, Xapian::doclength /*len*/) const { return 0; } 01097 Xapian::weight get_maxpart() const { return 0; } 01098 01099 Xapian::weight get_sumextra(Xapian::doclength /*len*/) const { return 0; } 01100 Xapian::weight get_maxextra() const { return 0; } 01101 01102 bool get_sumpart_needs_doclength() const { return false; } 01103 }; 01104 01117 class BM25Weight : public Weight { 01118 private: 01119 mutable Xapian::weight termweight; 01120 mutable Xapian::doclength lenpart; 01121 01122 double k1, k2, k3, b; 01123 Xapian::doclength min_normlen; 01124 01125 mutable bool weight_calculated; 01126 01127 void calc_termweight() const; 01128 01129 public: 01148 BM25Weight(double k1_, double k2_, double k3_, double b_, 01149 double min_normlen_) 01150 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_), 01151 weight_calculated(false) 01152 { 01153 if (k1 < 0) k1 = 0; 01154 if (k2 < 0) k2 = 0; 01155 if (k3 < 0) k3 = 0; 01156 if (b < 0) b = 0; else if (b > 1) b = 1; 01157 } 01158 BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5), 01159 weight_calculated(false) { } 01160 01161 BM25Weight * clone() const; 01162 ~BM25Weight() { } 01163 std::string name() const; 01164 std::string serialise() const; 01165 BM25Weight * unserialise(const std::string & s) const; 01166 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const; 01167 Xapian::weight get_maxpart() const; 01168 01169 Xapian::weight get_sumextra(Xapian::doclength len) const; 01170 Xapian::weight get_maxextra() const; 01171 01172 bool get_sumpart_needs_doclength() const; 01173 }; 01174 01188 class TradWeight : public Weight { 01189 private: 01190 mutable Xapian::weight termweight; 01191 mutable Xapian::doclength lenpart; 01192 01193 double param_k; 01194 01195 mutable bool weight_calculated; 01196 01197 void calc_termweight() const; 01198 01199 public: 01207 explicit TradWeight(double k) : param_k(k), weight_calculated(false) { 01208 if (param_k < 0) param_k = 0; 01209 } 01210 01211 TradWeight() : param_k(1.0), weight_calculated(false) { } 01212 01213 TradWeight * clone() const; 01214 ~TradWeight() { } 01215 std::string name() const; 01216 std::string serialise() const; 01217 TradWeight * unserialise(const std::string & s) const; 01218 01219 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const; 01220 Xapian::weight get_maxpart() const; 01221 01222 Xapian::weight get_sumextra(Xapian::doclength len) const; 01223 Xapian::weight get_maxextra() const; 01224 01225 bool get_sumpart_needs_doclength() const; 01226 }; 01227 01228 } 01229 01230 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 0.9.2).
Generated on 15 Jul 2005 by Doxygen 1.3.8.