| 1 | |
|---|
| 2 | |
|---|
| 3 | |
|---|
| 4 | |
|---|
| 5 | |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | |
|---|
| 9 | |
|---|
| 10 | |
|---|
| 11 | |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | |
|---|
| 17 | from bisect import bisect_left, bisect_right, insort |
|---|
| 18 | from itertools import izip, repeat |
|---|
| 19 | |
|---|
| 20 | |
|---|
| 21 | """ |
|---|
| 22 | This module contains "matcher" classes. Matchers deal with posting lists. The |
|---|
| 23 | most basic matcher, which reads the list of postings for a term, will be |
|---|
| 24 | provided by the backend implementation (for example, |
|---|
| 25 | ``whoosh.filedb.filepostings.FilePostingReader``). The classes in this module |
|---|
| 26 | provide additional functionality, such as combining the results of two |
|---|
| 27 | matchers, or modifying the results of a matcher. |
|---|
| 28 | |
|---|
| 29 | You do not need to deal with the classes in this module unless you need to |
|---|
| 30 | write your own Matcher implementation to provide some new functionality. These |
|---|
| 31 | classes are not instantiated by the user. They are usually created by a |
|---|
| 32 | :class:`~whoosh.query.Query` object's ``matcher()`` method, which returns the |
|---|
| 33 | appropriate matcher to implement the query (for example, the ``Or`` query's |
|---|
| 34 | ``matcher()`` method returns a ``UnionMatcher`` object). |
|---|
| 35 | |
|---|
| 36 | Certain backends support "quality" optimizations. These backends have the |
|---|
| 37 | ability to skip ahead if it knows the current block of postings can't |
|---|
| 38 | contribute to the top N documents. If the matcher tree and backend support |
|---|
| 39 | these optimizations, the matcher's ``supports_quality()`` method will return |
|---|
| 40 | ``True``. |
|---|
| 41 | """ |
|---|
| 42 | |
|---|
| 43 | |
|---|
| 44 | class ReadTooFar(Exception): |
|---|
| 45 | """Raised when next() or skip_to() is called on an inactive matchers. |
|---|
| 46 | """ |
|---|
| 47 | |
|---|
| 48 | |
|---|
| 49 | class NoQualityAvailable(Exception): |
|---|
| 50 | """Raised when quality methods are called on a matcher that does not |
|---|
| 51 | support quality-based optimizations. |
|---|
| 52 | """ |
|---|
| 53 | |
|---|
| 54 | |
|---|
| 55 | |
|---|
| 56 | |
|---|
| 57 | class Matcher(object): |
|---|
| 58 | """Base class for all matchers. |
|---|
| 59 | """ |
|---|
| 60 | |
|---|
| 61 | def is_active(self): |
|---|
| 62 | """Returns True if this matcher is still "active", that is, it has not |
|---|
| 63 | yet reached the end of the posting list. |
|---|
| 64 | """ |
|---|
| 65 | |
|---|
| 66 | raise NotImplementedError |
|---|
| 67 | |
|---|
| 68 | def replace(self): |
|---|
| 69 | """Returns a possibly-simplified version of this matcher. For example, |
|---|
| 70 | if one of the children of a UnionMatcher is no longer active, calling |
|---|
| 71 | this method on the UnionMatcher will return the other child. |
|---|
| 72 | """ |
|---|
| 73 | |
|---|
| 74 | return self |
|---|
| 75 | |
|---|
| 76 | def copy(self): |
|---|
| 77 | """Returns a copy of this matcher. |
|---|
| 78 | """ |
|---|
| 79 | |
|---|
| 80 | raise NotImplementedError |
|---|
| 81 | |
|---|
| 82 | def depth(self): |
|---|
| 83 | """Returns the depth of the tree under this matcher, or 0 if this |
|---|
| 84 | matcher does not have any children. |
|---|
| 85 | """ |
|---|
| 86 | |
|---|
| 87 | return 0 |
|---|
| 88 | |
|---|
| 89 | def supports_quality(self): |
|---|
| 90 | """Returns True if this matcher supports the use of ``quality`` and |
|---|
| 91 | ``block_quality``. |
|---|
| 92 | """ |
|---|
| 93 | |
|---|
| 94 | return False |
|---|
| 95 | |
|---|
| 96 | def quality(self): |
|---|
| 97 | """Returns a quality measurement of the current posting, according to |
|---|
| 98 | the current weighting algorithm. Raises ``NoQualityAvailable`` if the |
|---|
| 99 | matcher or weighting do not support quality measurements. |
|---|
| 100 | """ |
|---|
| 101 | |
|---|
| 102 | raise NoQualityAvailable |
|---|
| 103 | |
|---|
| 104 | def block_quality(self): |
|---|
| 105 | """Returns a quality measurement of the current block of postings, |
|---|
| 106 | according to the current weighting algorithm. Raises |
|---|
| 107 | ``NoQualityAvailable`` if the matcher or weighting do not support |
|---|
| 108 | quality measurements. |
|---|
| 109 | """ |
|---|
| 110 | |
|---|
| 111 | raise NoQualityAvailable |
|---|
| 112 | |
|---|
| 113 | def id(self): |
|---|
| 114 | """Returns the ID of the current posting. |
|---|
| 115 | """ |
|---|
| 116 | |
|---|
| 117 | raise NotImplementedError |
|---|
| 118 | |
|---|
| 119 | def all_ids(self): |
|---|
| 120 | """Returns a generator of all IDs in the matcher. |
|---|
| 121 | |
|---|
| 122 | What this method returns for a matcher that has already read some |
|---|
| 123 | postings (whether it only yields the remaining postings or all postings |
|---|
| 124 | from the beginning) is undefined, so it's best to only use this method |
|---|
| 125 | on fresh matchers. |
|---|
| 126 | """ |
|---|
| 127 | |
|---|
| 128 | i = 0 |
|---|
| 129 | while self.is_active(): |
|---|
| 130 | yield self.id() |
|---|
| 131 | self.next() |
|---|
| 132 | i += 1 |
|---|
| 133 | if i == 10: |
|---|
| 134 | self = self.replace() |
|---|
| 135 | i = 0 |
|---|
| 136 | |
|---|
| 137 | def all_items(self): |
|---|
| 138 | """Returns a generator of all (ID, encoded value) pairs in the matcher. |
|---|
| 139 | |
|---|
| 140 | What this method returns for a matcher that has already read some |
|---|
| 141 | postings (whether it only yields the remaining postings or all postings |
|---|
| 142 | from the beginning) is undefined, so it's best to only use this method |
|---|
| 143 | on fresh matchers. |
|---|
| 144 | """ |
|---|
| 145 | |
|---|
| 146 | i = 0 |
|---|
| 147 | while self.is_active(): |
|---|
| 148 | yield (self.id(), self.value()) |
|---|
| 149 | self.next() |
|---|
| 150 | i += 1 |
|---|
| 151 | if i == 10: |
|---|
| 152 | self = self.replace() |
|---|
| 153 | i = 0 |
|---|
| 154 | |
|---|
| 155 | def items_as(self, astype): |
|---|
| 156 | """Returns a generator of all (ID, decoded value) pairs in the matcher. |
|---|
| 157 | |
|---|
| 158 | What this method returns for a matcher that has already read some |
|---|
| 159 | postings (whether it only yields the remaining postings or all postings |
|---|
| 160 | from the beginning) is undefined, so it's best to only use this method |
|---|
| 161 | on fresh matchers. |
|---|
| 162 | """ |
|---|
| 163 | |
|---|
| 164 | while self.is_active(): |
|---|
| 165 | yield (self.id(), self.value_as(astype)) |
|---|
| 166 | |
|---|
| 167 | def value(self): |
|---|
| 168 | """Returns the encoded value of the current posting. |
|---|
| 169 | """ |
|---|
| 170 | |
|---|
| 171 | raise NotImplementedError |
|---|
| 172 | |
|---|
| 173 | def supports(self, astype): |
|---|
| 174 | """Returns True if the field's format supports the named data type, |
|---|
| 175 | for example 'frequency' or 'characters'. |
|---|
| 176 | """ |
|---|
| 177 | |
|---|
| 178 | raise NotImplementedError("supports not implemented in %s" % self.__class__) |
|---|
| 179 | |
|---|
| 180 | def value_as(self, astype): |
|---|
| 181 | """Returns the value(s) of the current posting as the given type. |
|---|
| 182 | """ |
|---|
| 183 | |
|---|
| 184 | raise NotImplementedError("value_as not implemented in %s" % self.__class__) |
|---|
| 185 | |
|---|
| 186 | def spans(self): |
|---|
| 187 | """Returns a list of :class:`whoosh.spans.Span` objects for the matches |
|---|
| 188 | in this document. Raises an exception if the field being searched does |
|---|
| 189 | not store positions. |
|---|
| 190 | """ |
|---|
| 191 | |
|---|
| 192 | from whoosh.spans import Span |
|---|
| 193 | if self.supports("characters"): |
|---|
| 194 | return [Span(pos, startchar=startchar, endchar=endchar) |
|---|
| 195 | for pos, startchar, endchar in self.value_as("characters")] |
|---|
| 196 | elif self.supports("positions"): |
|---|
| 197 | return [Span(pos) for pos in self.value_as("positions")] |
|---|
| 198 | else: |
|---|
| 199 | raise Exception("Field does not support spans") |
|---|
| 200 | |
|---|
| 201 | def skip_to(self, id): |
|---|
| 202 | """Moves this matcher to the first posting with an ID equal to or |
|---|
| 203 | greater than the given ID. |
|---|
| 204 | """ |
|---|
| 205 | |
|---|
| 206 | while self.is_active() and self.id() < id: |
|---|
| 207 | self.next() |
|---|
| 208 | |
|---|
| 209 | def skip_to_quality(self, minquality): |
|---|
| 210 | """Moves this matcher to the next block with greater than the given |
|---|
| 211 | minimum quality value. |
|---|
| 212 | """ |
|---|
| 213 | |
|---|
| 214 | raise NotImplementedError |
|---|
| 215 | |
|---|
| 216 | def next(self): |
|---|
| 217 | """Moves this matcher to the next posting. |
|---|
| 218 | """ |
|---|
| 219 | |
|---|
| 220 | raise NotImplementedError(self.__class__.__name__) |
|---|
| 221 | |
|---|
| 222 | def weight(self): |
|---|
| 223 | """Returns the weight of the current posting. |
|---|
| 224 | """ |
|---|
| 225 | |
|---|
| 226 | return self.value_as("weight") |
|---|
| 227 | |
|---|
| 228 | def score(self): |
|---|
| 229 | """Returns the score of the current posting. |
|---|
| 230 | """ |
|---|
| 231 | |
|---|
| 232 | raise NotImplementedError |
|---|
| 233 | |
|---|
| 234 | |
|---|
| 235 | class NullMatcher(Matcher): |
|---|
| 236 | """Matcher with no postings which is never active. |
|---|
| 237 | """ |
|---|
| 238 | |
|---|
| 239 | def is_active(self): |
|---|
| 240 | return False |
|---|
| 241 | |
|---|
| 242 | def all_ids(self): |
|---|
| 243 | return [] |
|---|
| 244 | |
|---|
| 245 | def copy(self): |
|---|
| 246 | return self |
|---|
| 247 | |
|---|
| 248 | |
|---|
| 249 | class ListMatcher(Matcher): |
|---|
| 250 | """Synthetic matcher backed by a list of IDs. |
|---|
| 251 | """ |
|---|
| 252 | |
|---|
| 253 | def __init__(self, ids, weights=None, values=None, format=None, |
|---|
| 254 | scorer=None, position=0): |
|---|
| 255 | """ |
|---|
| 256 | :param ids: a list of doc IDs. |
|---|
| 257 | :param weights: a list of weights corresponding to the list of IDs. |
|---|
| 258 | If this argument is not supplied, a list of 1.0 values is used. |
|---|
| 259 | :param values: a list of encoded values corresponding to the list of |
|---|
| 260 | IDs. |
|---|
| 261 | :param format: a :class:`whoosh.formats.Format` object representing the |
|---|
| 262 | format of the field. |
|---|
| 263 | :param scorer: a :class:`whoosh.scoring.BaseScorer` object for scoring |
|---|
| 264 | the postings. |
|---|
| 265 | """ |
|---|
| 266 | |
|---|
| 267 | self._ids = ids |
|---|
| 268 | self._weights = weights |
|---|
| 269 | self._values = values |
|---|
| 270 | self._i = position |
|---|
| 271 | self._format = format |
|---|
| 272 | self._scorer = scorer |
|---|
| 273 | |
|---|
| 274 | def __repr__(self): |
|---|
| 275 | return "%s(%r, %r, %r, %d)" % (self.__class__.__name__, self._ids, |
|---|
| 276 | self._weights, self._values, self._i) |
|---|
| 277 | |
|---|
| 278 | def is_active(self): |
|---|
| 279 | return self._i < len(self._ids) |
|---|
| 280 | |
|---|
| 281 | def copy(self): |
|---|
| 282 | return self.__class__(self._ids, self._weights, self._values, |
|---|
| 283 | self._format, self._scorer, self._i) |
|---|
| 284 | |
|---|
| 285 | def supports_quality(self): |
|---|
| 286 | return self._scorer is not None |
|---|
| 287 | |
|---|
| 288 | def quality(self): |
|---|
| 289 | return self._scorer.quality(self) |
|---|
| 290 | |
|---|
| 291 | def id(self): |
|---|
| 292 | return self._ids[self._i] |
|---|
| 293 | |
|---|
| 294 | def all_ids(self): |
|---|
| 295 | return iter(self._ids) |
|---|
| 296 | |
|---|
| 297 | def all_items(self): |
|---|
| 298 | values = self._values |
|---|
| 299 | if values is None: |
|---|
| 300 | values = repeat('') |
|---|
| 301 | |
|---|
| 302 | return izip(self._ids, values) |
|---|
| 303 | |
|---|
| 304 | def value(self): |
|---|
| 305 | if self._values: |
|---|
| 306 | return self._values[self._i] |
|---|
| 307 | else: |
|---|
| 308 | return '' |
|---|
| 309 | |
|---|
| 310 | def value_as(self, astype): |
|---|
| 311 | decoder = self._format.decoder(astype) |
|---|
| 312 | return decoder(self.value()) |
|---|
| 313 | |
|---|
| 314 | def supports(self, astype): |
|---|
| 315 | return self._format.supports(astype) |
|---|
| 316 | |
|---|
| 317 | def next(self): |
|---|
| 318 | self._i += 1 |
|---|
| 319 | |
|---|
| 320 | def weight(self): |
|---|
| 321 | if self._weights: |
|---|
| 322 | return self._weights[self._i] |
|---|
| 323 | else: |
|---|
| 324 | return 1.0 |
|---|
| 325 | |
|---|
| 326 | def score(self): |
|---|
| 327 | if self._scorer: |
|---|
| 328 | return self._scorer.score(self) |
|---|
| 329 | else: |
|---|
| 330 | return self.weight() |
|---|
| 331 | |
|---|
| 332 | |
|---|
| 333 | class WrappingMatcher(Matcher): |
|---|
| 334 | """Base class for matchers that wrap sub-matchers. |
|---|
| 335 | """ |
|---|
| 336 | |
|---|
| 337 | def __init__(self, child, boost=1.0): |
|---|
| 338 | self.child = child |
|---|
| 339 | self.boost = boost |
|---|
| 340 | |
|---|
| 341 | def __repr__(self): |
|---|
| 342 | return "%s(%r, boost=%s)" % (self.__class__.__name__, self.child, self.boost) |
|---|
| 343 | |
|---|
| 344 | def copy(self): |
|---|
| 345 | kwargs = {} |
|---|
| 346 | if hasattr(self, "boost"): |
|---|
| 347 | kwargs["boost"] = self.boost |
|---|
| 348 | return self.__class__(self.child.copy(), **kwargs) |
|---|
| 349 | |
|---|
| 350 | def depth(self): |
|---|
| 351 | return 1 + self.child.depth() |
|---|
| 352 | |
|---|
| 353 | def _replacement(self, newchild): |
|---|
| 354 | return self.__class__(newchild, boost=self.boost) |
|---|
| 355 | |
|---|
| 356 | def replace(self): |
|---|
| 357 | r = self.child.replace() |
|---|
| 358 | if not r.is_active(): |
|---|
| 359 | return NullMatcher() |
|---|
| 360 | if r is not self.child: |
|---|
| 361 | try: |
|---|
| 362 | return self._replacement(r) |
|---|
| 363 | except TypeError, e: |
|---|
| 364 | raise TypeError("Class %s got exception %s trying " |
|---|
| 365 | "to replace itself" % (self.__class__, e)) |
|---|
| 366 | else: |
|---|
| 367 | return self |
|---|
| 368 | |
|---|
| 369 | def id(self): |
|---|
| 370 | return self.child.id() |
|---|
| 371 | |
|---|
| 372 | def all_ids(self): |
|---|
| 373 | return self.child.all_ids() |
|---|
| 374 | |
|---|
| 375 | def is_active(self): |
|---|
| 376 | return self.child.is_active() |
|---|
| 377 | |
|---|
| 378 | def supports(self, astype): |
|---|
| 379 | return self.child.supports(astype) |
|---|
| 380 | |
|---|
| 381 | def value(self): |
|---|
| 382 | return self.child.value() |
|---|
| 383 | |
|---|
| 384 | def value_as(self, astype): |
|---|
| 385 | return self.child.value_as(astype) |
|---|
| 386 | |
|---|
| 387 | def spans(self): |
|---|
| 388 | return self.child.spans() |
|---|
| 389 | |
|---|
| 390 | def skip_to(self, id): |
|---|
| 391 | return self.child.skip_to(id) |
|---|
| 392 | |
|---|
| 393 | def next(self): |
|---|
| 394 | self.child.next() |
|---|
| 395 | |
|---|
| 396 | def supports_quality(self): |
|---|
| 397 | return self.child.supports_quality() |
|---|
| 398 | |
|---|
| 399 | def skip_to_quality(self, minquality): |
|---|
| 400 | return self.child.skip_to_quality(minquality/self.boost) |
|---|
| 401 | |
|---|
| 402 | def quality(self): |
|---|
| 403 | return self.child.quality() * self.boost |
|---|
| 404 | |
|---|
| 405 | def block_quality(self): |
|---|
| 406 | return self.child.block_quality() * self.boost |
|---|
| 407 | |
|---|
| 408 | def weight(self): |
|---|
| 409 | return self.child.weight() * self.boost |
|---|
| 410 | |
|---|
| 411 | def score(self): |
|---|
| 412 | return self.child.score() * self.boost |
|---|
| 413 | |
|---|
| 414 | |
|---|
| 415 | class MultiMatcher(Matcher): |
|---|
| 416 | """Serializes the results of a list of sub-matchers. |
|---|
| 417 | """ |
|---|
| 418 | |
|---|
| 419 | def __init__(self, matchers, idoffsets, current=0): |
|---|
| 420 | """ |
|---|
| 421 | :param matchers: a list of Matcher objects. |
|---|
| 422 | :param idoffsets: a list of offsets corresponding to items in the |
|---|
| 423 | ``matchers`` list. |
|---|
| 424 | """ |
|---|
| 425 | |
|---|
| 426 | self.matchers = matchers |
|---|
| 427 | self.offsets = idoffsets |
|---|
| 428 | self.current = current |
|---|
| 429 | self._next_matcher() |
|---|
| 430 | |
|---|
| 431 | def __repr__(self): |
|---|
| 432 | return "%s(%r, %r, current=%s)" % (self.__class__.__name__, |
|---|
| 433 | self.matchers, self.offsets, |
|---|
| 434 | self.current) |
|---|
| 435 | |
|---|
| 436 | def is_active(self): |
|---|
| 437 | return self.current < len(self.matchers) |
|---|
| 438 | |
|---|
| 439 | def _next_matcher(self): |
|---|
| 440 | matchers = self.matchers |
|---|
| 441 | while self.current < len(matchers) and not matchers[self.current].is_active(): |
|---|
| 442 | self.current += 1 |
|---|
| 443 | |
|---|
| 444 | def copy(self): |
|---|
| 445 | return self.__class__([mr.copy() for mr in self.matchers[self.current:]], |
|---|
| 446 | self.offsets[self.current:], current=self.current) |
|---|
| 447 | |
|---|
| 448 | def depth(self): |
|---|
| 449 | if self.is_active(): |
|---|
| 450 | return 1 + max(mr.depth() for mr in self.matchers[self.current:]) |
|---|
| 451 | else: |
|---|
| 452 | return 0 |
|---|
| 453 | |
|---|
| 454 | def replace(self): |
|---|
| 455 | if not self.is_active(): |
|---|
| 456 | return NullMatcher() |
|---|
| 457 | |
|---|
| 458 | |
|---|
| 459 | |
|---|
| 460 | return self |
|---|
| 461 | |
|---|
| 462 | def id(self): |
|---|
| 463 | current = self.current |
|---|
| 464 | return self.matchers[current].id() + self.offsets[current] |
|---|
| 465 | |
|---|
| 466 | def all_ids(self): |
|---|
| 467 | offsets = self.offsets |
|---|
| 468 | for i, mr in enumerate(self.matchers): |
|---|
| 469 | for id in mr.all_ids(): |
|---|
| 470 | yield id + offsets[i] |
|---|
| 471 | |
|---|
| 472 | def spans(self): |
|---|
| 473 | return self.matchers[self.current].spans() |
|---|
| 474 | |
|---|
| 475 | def supports(self, astype): |
|---|
| 476 | return self.matchers[self.current].supports(astype) |
|---|
| 477 | |
|---|
| 478 | def value(self): |
|---|
| 479 | return self.matchers[self.current].value() |
|---|
| 480 | |
|---|
| 481 | def value_as(self, astype): |
|---|
| 482 | return self.matchers[self.current].value_as(astype) |
|---|
| 483 | |
|---|
| 484 | def next(self): |
|---|
| 485 | if not self.is_active(): raise ReadTooFar |
|---|
| 486 | |
|---|
| 487 | self.matchers[self.current].next() |
|---|
| 488 | if not self.matchers[self.current].is_active(): |
|---|
| 489 | self._next_matcher() |
|---|
| 490 | |
|---|
| 491 | def skip_to(self, id): |
|---|
| 492 | if not self.is_active(): raise ReadTooFar |
|---|
| 493 | if id <= self.id(): return |
|---|
| 494 | |
|---|
| 495 | matchers = self.matchers |
|---|
| 496 | offsets = self.offsets |
|---|
| 497 | r = False |
|---|
| 498 | |
|---|
| 499 | while self.current < len(matchers) and id > self.id(): |
|---|
| 500 | mr = matchers[self.current] |
|---|
| 501 | sr = mr.skip_to(id - offsets[self.current]) |
|---|
| 502 | r = sr or r |
|---|
| 503 | if mr.is_active(): |
|---|
| 504 | break |
|---|
| 505 | |
|---|
| 506 | self._next_matcher() |
|---|
| 507 | |
|---|
| 508 | return r |
|---|
| 509 | |
|---|
| 510 | def supports_quality(self): |
|---|
| 511 | return all(mr.supports_quality() for mr in self.matchers[self.current:]) |
|---|
| 512 | |
|---|
| 513 | def quality(self): |
|---|
| 514 | return self.matchers[self.current].quality() |
|---|
| 515 | |
|---|
| 516 | def block_quality(self): |
|---|
| 517 | return self.matchers[self.current].block_quality() |
|---|
| 518 | |
|---|
| 519 | def weight(self): |
|---|
| 520 | return self.matchers[self.current].weight() |
|---|
| 521 | |
|---|
| 522 | def score(self): |
|---|
| 523 | return self.matchers[self.current].score() |
|---|
| 524 | |
|---|
| 525 | |
|---|
| 526 | class ExcludeMatcher(WrappingMatcher): |
|---|
| 527 | """Excludes a list of IDs from the postings returned by the wrapped |
|---|
| 528 | matcher. |
|---|
| 529 | """ |
|---|
| 530 | |
|---|
| 531 | def __init__(self, child, excluded, boost=1.0): |
|---|
| 532 | super(ExcludeMatcher, self).__init__(child) |
|---|
| 533 | self.excluded = excluded |
|---|
| 534 | self.boost = boost |
|---|
| 535 | self._find_next() |
|---|
| 536 | |
|---|
| 537 | def __repr__(self): |
|---|
| 538 | return "%s(%r, %r, boost=%s)" % (self.__class__.__name__, self.child, |
|---|
| 539 | self.excluded, self.boost) |
|---|
| 540 | |
|---|
| 541 | def copy(self): |
|---|
| 542 | return self.__class__(self.child.copy(), self.excluded, boost=self.boost) |
|---|
| 543 | |
|---|
| 544 | def _replacement(self, newchild): |
|---|
| 545 | return self.__class__(newchild, self.excluded, boost=self.boost) |
|---|
| 546 | |
|---|
| 547 | def _find_next(self): |
|---|
| 548 | child = self.child |
|---|
| 549 | excluded = self.excluded |
|---|
| 550 | r = False |
|---|
| 551 | while child.is_active() and child.id() in excluded: |
|---|
| 552 | r = child.next() or r |
|---|
| 553 | return r |
|---|
| 554 | |
|---|
| 555 | def next(self): |
|---|
| 556 | self.child.next() |
|---|
| 557 | self._find_next() |
|---|
| 558 | |
|---|
| 559 | def skip_to(self, id): |
|---|
| 560 | self.child.skip_to(id) |
|---|
| 561 | self._find_next() |
|---|
| 562 | |
|---|
| 563 | def all_ids(self): |
|---|
| 564 | excluded = self.excluded |
|---|
| 565 | return (id for id in self.child.all_ids() if id not in excluded) |
|---|
| 566 | |
|---|
| 567 | def all_items(self): |
|---|
| 568 | excluded = self.excluded |
|---|
| 569 | return (item for item in self.child.all_items() |
|---|
| 570 | if item[0] not in excluded) |
|---|
| 571 | |
|---|
| 572 | |
|---|
| 573 | class BiMatcher(Matcher): |
|---|
| 574 | """Base class for matchers that combine the results of two sub-matchers in |
|---|
| 575 | some way. |
|---|
| 576 | """ |
|---|
| 577 | |
|---|
| 578 | def __init__(self, a, b): |
|---|
| 579 | super(BiMatcher, self).__init__() |
|---|
| 580 | self.a = a |
|---|
| 581 | self.b = b |
|---|
| 582 | |
|---|
| 583 | def __repr__(self): |
|---|
| 584 | return "%s(%r, %r)" % (self.__class__.__name__, self.a, self.b) |
|---|
| 585 | |
|---|
| 586 | def copy(self): |
|---|
| 587 | return self.__class__(self.a.copy(), self.b.copy()) |
|---|
| 588 | |
|---|
| 589 | def depth(self): |
|---|
| 590 | return 1 + max(self.a.depth(), self.b.depth()) |
|---|
| 591 | |
|---|
| 592 | def skip_to(self, id): |
|---|
| 593 | if not self.is_active(): raise ReadTooFar |
|---|
| 594 | ra = self.a.skip_to(id) |
|---|
| 595 | rb = self.b.skip_to(id) |
|---|
| 596 | return ra or rb |
|---|
| 597 | |
|---|
| 598 | def supports_quality(self): |
|---|
| 599 | return self.a.supports_quality() and self.b.supports_quality() |
|---|
| 600 | |
|---|
| 601 | def supports(self, astype): |
|---|
| 602 | return self.a.supports(astype) and self.b.supports(astype) |
|---|
| 603 | |
|---|
| 604 | |
|---|
| 605 | class AdditiveBiMatcher(BiMatcher): |
|---|
| 606 | """Base class for binary matchers where the scores of the sub-matchers are |
|---|
| 607 | added together. |
|---|
| 608 | """ |
|---|
| 609 | |
|---|
| 610 | def quality(self): |
|---|
| 611 | q = 0.0 |
|---|
| 612 | if self.a.is_active(): q += self.a.quality() |
|---|
| 613 | if self.b.is_active(): q += self.b.quality() |
|---|
| 614 | return q |
|---|
| 615 | |
|---|
| 616 | def block_quality(self): |
|---|
| 617 | bq = 0.0 |
|---|
| 618 | if self.a.is_active(): bq += self.a.block_quality() |
|---|
| 619 | if self.b.is_active(): bq += self.b.block_quality() |
|---|
| 620 | return bq |
|---|
| 621 | |
|---|
| 622 | def weight(self): |
|---|
| 623 | return (self.a.weight() + self.b.weight()) |
|---|
| 624 | |
|---|
| 625 | def score(self): |
|---|
| 626 | return (self.a.score() + self.b.score()) |
|---|
| 627 | |
|---|
| 628 | |
|---|
| 629 | class UnionMatcher(AdditiveBiMatcher): |
|---|
| 630 | """Matches the union (OR) of the postings in the two sub-matchers. |
|---|
| 631 | """ |
|---|
| 632 | |
|---|
| 633 | def replace(self): |
|---|
| 634 | a = self.a.replace() |
|---|
| 635 | b = self.b.replace() |
|---|
| 636 | |
|---|
| 637 | a_active = a.is_active() |
|---|
| 638 | b_active = b.is_active() |
|---|
| 639 | if not (a_active or b_active): return NullMatcher() |
|---|
| 640 | if not a_active: |
|---|
| 641 | return b |
|---|
| 642 | if not b_active: |
|---|
| 643 | return a |
|---|
| 644 | |
|---|
| 645 | if a is not self.a or b is not self.b: |
|---|
| 646 | return self.__class__(a, b) |
|---|
| 647 | return self |
|---|
| 648 | |
|---|
| 649 | def is_active(self): |
|---|
| 650 | return self.a.is_active() or self.b.is_active() |
|---|
| 651 | |
|---|
| 652 | def skip_to(self, id): |
|---|
| 653 | ra = rb = False |
|---|
| 654 | if self.a.is_active(): |
|---|
| 655 | ra = self.a.skip_to(id) |
|---|
| 656 | if self.b.is_active(): |
|---|
| 657 | rb = self.b.skip_to(id) |
|---|
| 658 | return ra or rb |
|---|
| 659 | |
|---|
| 660 | def id(self): |
|---|
| 661 | a = self.a |
|---|
| 662 | b = self.b |
|---|
| 663 | if not a.is_active(): return b.id() |
|---|
| 664 | if not b.is_active(): return a.id() |
|---|
| 665 | return min(a.id(), b.id()) |
|---|
| 666 | |
|---|
| 667 | |
|---|
| 668 | |
|---|
| 669 | def all_ids(self): |
|---|
| 670 | return iter(sorted(set(self.a.all_ids()) | set(self.b.all_ids()))) |
|---|
| 671 | |
|---|
| 672 | def next(self): |
|---|
| 673 | a = self.a |
|---|
| 674 | b = self.b |
|---|
| 675 | a_active = a.is_active() |
|---|
| 676 | b_active = b.is_active() |
|---|
| 677 | |
|---|
| 678 | |
|---|
| 679 | if not (a_active or b_active): |
|---|
| 680 | raise ReadTooFar |
|---|
| 681 | elif not a_active: |
|---|
| 682 | return b.next() |
|---|
| 683 | elif not b_active: |
|---|
| 684 | return a.next() |
|---|
| 685 | |
|---|
| 686 | a_id = a.id() |
|---|
| 687 | b_id = b.id() |
|---|
| 688 | ar = br = None |
|---|
| 689 | if a_id <= b_id: ar = a.next() |
|---|
| 690 | if b_id <= a_id: br = b.next() |
|---|
| 691 | return ar or br |
|---|
| 692 | |
|---|
| 693 | def spans(self): |
|---|
| 694 | |
|---|
| 695 | |
|---|
| 696 | if not self.a.is_active(): return self.b.spans() |
|---|
| 697 | if not self.b.is_active(): return self.a.spans() |
|---|
| 698 | id_a = self.a.id() |
|---|
| 699 | id_b = self.b.id() |
|---|
| 700 | if id_a < id_b: |
|---|
| 701 | return self.a.spans() |
|---|
| 702 | elif id_b < id_a: |
|---|
| 703 | return self.b.spans() |
|---|
| 704 | else: |
|---|
| 705 | return sorted(set(self.a.spans()) | set(self.b.spans())) |
|---|
| 706 | |
|---|
| 707 | def score(self): |
|---|
| 708 | a = self.a |
|---|
| 709 | b = self.b |
|---|
| 710 | |
|---|
| 711 | if not a.is_active(): return b.score() |
|---|
| 712 | if not b.is_active(): return a.score() |
|---|
| 713 | |
|---|
| 714 | id_a = a.id() |
|---|
| 715 | id_b = b.id() |
|---|
| 716 | if id_a < id_b: |
|---|
| 717 | return a.score() |
|---|
| 718 | elif id_b < id_a: |
|---|
| 719 | return b.score() |
|---|
| 720 | else: |
|---|
| 721 | return (a.score() + b.score()) |
|---|
| 722 | |
|---|
| 723 | def skip_to_quality(self, minquality): |
|---|
| 724 | a = self.a |
|---|
| 725 | b = self.b |
|---|
| 726 | minquality = minquality |
|---|
| 727 | |
|---|
| 728 | |
|---|
| 729 | if not a.is_active(): |
|---|
| 730 | sk = b.skip_to_quality(minquality) |
|---|
| 731 | return sk |
|---|
| 732 | elif not b.is_active(): |
|---|
| 733 | return a.skip_to_quality(minquality) |
|---|
| 734 | |
|---|
| 735 | skipped = 0 |
|---|
| 736 | aq = a.block_quality() |
|---|
| 737 | bq = b.block_quality() |
|---|
| 738 | while a.is_active() and b.is_active() and aq + bq <= minquality: |
|---|
| 739 | if aq < bq: |
|---|
| 740 | skipped += a.skip_to_quality(minquality - bq) |
|---|
| 741 | aq = a.block_quality() |
|---|
| 742 | else: |
|---|
| 743 | skipped += b.skip_to_quality(minquality - aq) |
|---|
| 744 | bq = b.block_quality() |
|---|
| 745 | |
|---|
| 746 | return skipped |
|---|
| 747 | |
|---|
| 748 | |
|---|
| 749 | class DisjunctionMaxMatcher(UnionMatcher): |
|---|
| 750 | """Matches the union (OR) of two sub-matchers. Where both sub-matchers |
|---|
| 751 | match the same posting, returns the weight/score of the higher-scoring |
|---|
| 752 | posting. |
|---|
| 753 | """ |
|---|
| 754 | |
|---|
| 755 | |
|---|
| 756 | |
|---|
| 757 | |
|---|
| 758 | |
|---|
| 759 | |
|---|
| 760 | def __init__(self, a, b, tiebreak=0.0): |
|---|
| 761 | super(DisjunctionMaxMatcher, self).__init__(a, b) |
|---|
| 762 | self.tiebreak = tiebreak |
|---|
| 763 | |
|---|
| 764 | def copy(self): |
|---|
| 765 | return self.__class__(self.a.copy(), self.b.copy(), |
|---|
| 766 | tiebreak=self.tiebreak) |
|---|
| 767 | |
|---|
| 768 | def score(self): |
|---|
| 769 | return max(self.a.score(), self.b.score()) |
|---|
| 770 | |
|---|
| 771 | def quality(self): |
|---|
| 772 | return max(self.a.quality(), self.b.quality()) |
|---|
| 773 | |
|---|
| 774 | def block_quality(self): |
|---|
| 775 | return max(self.a.block_quality(), self.b.block_quality()) |
|---|
| 776 | |
|---|
| 777 | def skip_to_quality(self, minquality): |
|---|
| 778 | a = self.a |
|---|
| 779 | b = self.b |
|---|
| 780 | minquality = minquality |
|---|
| 781 | |
|---|
| 782 | |
|---|
| 783 | if not a.is_active(): |
|---|
| 784 | sk = b.skip_to_quality(minquality) |
|---|
| 785 | return sk |
|---|
| 786 | elif not b.is_active(): |
|---|
| 787 | return a.skip_to_quality(minquality) |
|---|
| 788 | |
|---|
| 789 | skipped = 0 |
|---|
| 790 | aq = a.block_quality() |
|---|
| 791 | bq = b.block_quality() |
|---|
| 792 | while a.is_active() and b.is_active() and max(aq, bq) <= minquality: |
|---|
| 793 | if aq <= minquality: |
|---|
| 794 | skipped += a.skip_to_quality(minquality) |
|---|
| 795 | aq = a.block_quality() |
|---|
| 796 | if bq <= minquality: |
|---|
| 797 | skipped += b.skip_to_quality(minquality) |
|---|
| 798 | bq = b.block_quality() |
|---|
| 799 | return skipped |
|---|
| 800 | |
|---|
| 801 | |
|---|
| 802 | class IntersectionMatcher(AdditiveBiMatcher): |
|---|
| 803 | """Matches the intersection (AND) of the postings in the two sub-matchers. |
|---|
| 804 | """ |
|---|
| 805 | |
|---|
| 806 | def __init__(self, a, b): |
|---|
| 807 | super(IntersectionMatcher, self).__init__(a, b) |
|---|
| 808 | if (self.a.is_active() |
|---|
| 809 | and self.b.is_active() |
|---|
| 810 | and self.a.id() != self.b.id()): |
|---|
| 811 | self._find_next() |
|---|
| 812 | |
|---|
| 813 | def replace(self): |
|---|
| 814 | a = self.a.replace() |
|---|
| 815 | b = self.b.replace() |
|---|
| 816 | |
|---|
| 817 | a_active = a |
|---|
| 818 | b_active = b.is_active() |
|---|
| 819 | if not (a_active and b_active): return NullMatcher() |
|---|
| 820 | |
|---|
| 821 | if a is not self.a or b is not self.b: |
|---|
| 822 | return self.__class__(a, b) |
|---|
| 823 | return self |
|---|
| 824 | |
|---|
| 825 | def is_active(self): |
|---|
| 826 | return self.a.is_active() and self.b.is_active() |
|---|
| 827 | |
|---|
| 828 | def _find_next(self): |
|---|
| 829 | a = self.a |
|---|
| 830 | b = self.b |
|---|
| 831 | a_id = a.id() |
|---|
| 832 | b_id = b.id() |
|---|
| 833 | assert a_id != b_id |
|---|
| 834 | r = False |
|---|
| 835 | |
|---|
| 836 | while a.is_active() and b.is_active() and a_id != b_id: |
|---|
| 837 | if a_id < b_id: |
|---|
| 838 | ra = a.skip_to(b_id) |
|---|
| 839 | if not a.is_active(): return |
|---|
| 840 | r = r or ra |
|---|
| 841 | a_id = a.id() |
|---|
| 842 | else: |
|---|
| 843 | rb = b.skip_to(a_id) |
|---|
| 844 | if not b.is_active(): return |
|---|
| 845 | r = r or rb |
|---|
| 846 | b_id = b.id() |
|---|
| 847 | return r |
|---|
| 848 | |
|---|
| 849 | def id(self): |
|---|
| 850 | return self.a.id() |
|---|
| 851 | |
|---|
| 852 | |
|---|
| 853 | |
|---|
| 854 | def all_ids(self): |
|---|
| 855 | return iter(sorted(set(self.a.all_ids()) & set(self.b.all_ids()))) |
|---|
| 856 | |
|---|
| 857 | def skip_to(self, id): |
|---|
| 858 | if not self.is_active(): raise ReadTooFar |
|---|
| 859 | ra = self.a.skip_to(id) |
|---|
| 860 | rb = self.b.skip_to(id) |
|---|
| 861 | if self.is_active(): |
|---|
| 862 | rn = False |
|---|
| 863 | if self.a.id() != self.b.id(): |
|---|
| 864 | rn = self._find_next() |
|---|
| 865 | return ra or rb or rn |
|---|
| 866 | |
|---|
| 867 | def skip_to_quality(self, minquality): |
|---|
| 868 | a = self.a |
|---|
| 869 | b = self.b |
|---|
| 870 | minquality = minquality |
|---|
| 871 | |
|---|
| 872 | skipped = 0 |
|---|
| 873 | aq = a.block_quality() |
|---|
| 874 | bq = b.block_quality() |
|---|
| 875 | while a.is_active() and b.is_active() and aq + bq <= minquality: |
|---|
| 876 | if aq < bq: |
|---|
| 877 | skipped += a.skip_to_quality(minquality - bq) |
|---|
| 878 | else: |
|---|
| 879 | skipped += b.skip_to_quality(minquality - aq) |
|---|
| 880 | if a.id() != b.id(): |
|---|
| 881 | self._find_next() |
|---|
| 882 | aq = a.block_quality() |
|---|
| 883 | bq = b.block_quality() |
|---|
| 884 | return skipped |
|---|
| 885 | |
|---|
| 886 | def next(self): |
|---|
| 887 | if not self.is_active(): raise ReadTooFar |
|---|
| 888 | |
|---|
| 889 | |
|---|
| 890 | |
|---|
| 891 | ar = self.a.next() |
|---|
| 892 | if self.is_active(): |
|---|
| 893 | nr = self._find_next() |
|---|
| 894 | return ar or nr |
|---|
| 895 | |
|---|
| 896 | def spans(self): |
|---|
| 897 | return sorted(set(self.a.spans()) | set(self.b.spans())) |
|---|
| 898 | |
|---|
| 899 | |
|---|
| 900 | class AndNotMatcher(BiMatcher): |
|---|
| 901 | """Matches the postings in the first sub-matcher that are NOT present in |
|---|
| 902 | the second sub-matcher. |
|---|
| 903 | """ |
|---|
| 904 | |
|---|
| 905 | def __init__(self, a, b): |
|---|
| 906 | super(AndNotMatcher, self).__init__(a, b) |
|---|
| 907 | if (self.a.is_active() |
|---|
| 908 | and self.b.is_active() |
|---|
| 909 | and self.a.id() != self.b.id()): |
|---|
| 910 | self._find_next() |
|---|
| 911 | |
|---|
| 912 | def is_active(self): |
|---|
| 913 | return self.a.is_active() |
|---|
| 914 | |
|---|
| 915 | def _find_next(self): |
|---|
| 916 | pos = self.a |
|---|
| 917 | neg = self.b |
|---|
| 918 | if not neg.is_active(): return |
|---|
| 919 | pos_id = pos.id() |
|---|
| 920 | r = False |
|---|
| 921 | |
|---|
| 922 | if neg.id() < pos_id: |
|---|
| 923 | neg.skip_to(pos_id) |
|---|
| 924 | |
|---|
| 925 | while neg.is_active() and pos_id == neg.id(): |
|---|
| 926 | nr = pos.next() |
|---|
| 927 | r = r or nr |
|---|
| 928 | pos_id = pos.id() |
|---|
| 929 | neg.skip_to(pos_id) |
|---|
| 930 | |
|---|
| 931 | return r |
|---|
| 932 | |
|---|
| 933 | def replace(self): |
|---|
| 934 | if not self.a.is_active(): return NullMatcher() |
|---|
| 935 | if not self.b.is_active(): return self.a |
|---|
| 936 | return self |
|---|
| 937 | |
|---|
| 938 | def quality(self): |
|---|
| 939 | return self.a.quality() |
|---|
| 940 | |
|---|
| 941 | def block_quality(self): |
|---|
| 942 | return self.a.block_quality() |
|---|
| 943 | |
|---|
| 944 | def skip_to_quality(self, minquality): |
|---|
| 945 | skipped = self.a.skip_to_quality(minquality) |
|---|
| 946 | self._find_next() |
|---|
| 947 | return skipped |
|---|
| 948 | |
|---|
| 949 | def id(self): |
|---|
| 950 | return self.a.id() |
|---|
| 951 | |
|---|
| 952 | def all_ids(self): |
|---|
| 953 | return iter(sorted(set(self.a.all_ids()) - set(self.b.all_ids()))) |
|---|
| 954 | |
|---|
| 955 | def next(self): |
|---|
| 956 | if not self.a.is_active(): raise ReadTooFar |
|---|
| 957 | ar = self.a.next() |
|---|
| 958 | nr = False |
|---|
| 959 | if self.b.is_active(): |
|---|
| 960 | nr = self._find_next() |
|---|
| 961 | return ar or nr |
|---|
| 962 | |
|---|
| 963 | def skip_to(self, id): |
|---|
| 964 | if not self.a.is_active(): raise ReadTooFar |
|---|
| 965 | if id < self.a.id(): return |
|---|
| 966 | |
|---|
| 967 | self.a.skip_to(id) |
|---|
| 968 | if self.b.is_active(): |
|---|
| 969 | self.b.skip_to(id) |
|---|
| 970 | self._find_next() |
|---|
| 971 | |
|---|
| 972 | def weight(self): |
|---|
| 973 | return self.a.weight() |
|---|
| 974 | |
|---|
| 975 | def score(self): |
|---|
| 976 | return self.a.score() |
|---|
| 977 | |
|---|
| 978 | def supports(self, astype): |
|---|
| 979 | return self.a.supports(astype) |
|---|
| 980 | |
|---|
| 981 | def value(self): |
|---|
| 982 | return self.a.value() |
|---|
| 983 | |
|---|
| 984 | def value_as(self, astype): |
|---|
| 985 | return self.a.value_as(astype) |
|---|
| 986 | |
|---|
| 987 | |
|---|
| 988 | class InverseMatcher(WrappingMatcher): |
|---|
| 989 | """Synthetic matcher, generates postings that are NOT present in the |
|---|
| 990 | wrapped matcher. |
|---|
| 991 | """ |
|---|
| 992 | |
|---|
| 993 | def __init__(self, child, limit, missing=None, weight=1.0): |
|---|
| 994 | super(InverseMatcher, self).__init__(child) |
|---|
| 995 | self.limit = limit |
|---|
| 996 | self._weight = weight |
|---|
| 997 | self.missing = missing or (lambda id: False) |
|---|
| 998 | self._id = 0 |
|---|
| 999 | self._find_next() |
|---|
| 1000 | |
|---|
| 1001 | def copy(self): |
|---|
| 1002 | return self.__class__(self.child.copy(), self.limit, |
|---|
| 1003 | weight=self._weight, missing=self.missing) |
|---|
| 1004 | |
|---|
| 1005 | def _replacement(self, newchild): |
|---|
| 1006 | return self.__class__(newchild, self.limit, missing=self.missing, |
|---|
| 1007 | weight=self.weight) |
|---|
| 1008 | |
|---|
| 1009 | def is_active(self): |
|---|
| 1010 | return self._id < self.limit |
|---|
| 1011 | |
|---|
| 1012 | def supports_quality(self): |
|---|
| 1013 | return False |
|---|
| 1014 | |
|---|
| 1015 | def _find_next(self): |
|---|
| 1016 | child = self.child |
|---|
| 1017 | missing = self.missing |
|---|
| 1018 | |
|---|
| 1019 | if not child.is_active() and not missing(self._id): |
|---|
| 1020 | return |
|---|
| 1021 | |
|---|
| 1022 | if child.is_active() and child.id() < self._id: |
|---|
| 1023 | child.skip_to(self._id) |
|---|
| 1024 | |
|---|
| 1025 | while (self._id < self.limit |
|---|
| 1026 | and ((child.is_active() and self._id == child.id()) |
|---|
| 1027 | or missing(self._id))): |
|---|
| 1028 | self._id += 1 |
|---|
| 1029 | if child.is_active(): |
|---|
| 1030 | child.next() |
|---|
| 1031 | |
|---|
| 1032 | def id(self): |
|---|
| 1033 | return self._id |
|---|
| 1034 | |
|---|
| 1035 | def all_ids(self): |
|---|
| 1036 | missing = self.missing |
|---|
| 1037 | negs = set(self.child.all_ids()) |
|---|
| 1038 | return (id for id in xrange(self.limit) |
|---|
| 1039 | if id not in negs and not missing(id)) |
|---|
| 1040 | |
|---|
| 1041 | def next(self): |
|---|
| 1042 | if self._id >= self.limit: raise ReadTooFar |
|---|
| 1043 | self._id += 1 |
|---|
| 1044 | self._find_next() |
|---|
| 1045 | |
|---|
| 1046 | def skip_to(self, id): |
|---|
| 1047 | if self._id >= self.limit: raise ReadTooFar |
|---|
| 1048 | if id < self._id: return |
|---|
| 1049 | self._id = id |
|---|
| 1050 | self._find_next() |
|---|
| 1051 | |
|---|
| 1052 | def weight(self): |
|---|
| 1053 | return self._weight |
|---|
| 1054 | |
|---|
| 1055 | def score(self): |
|---|
| 1056 | return self._weight |
|---|
| 1057 | |
|---|
| 1058 | |
|---|
| 1059 | class RequireMatcher(WrappingMatcher): |
|---|
| 1060 | """Matches postings that are in both sub-matchers, but only uses scores |
|---|
| 1061 | from the first. |
|---|
| 1062 | """ |
|---|
| 1063 | |
|---|
| 1064 | def __init__(self, a, b): |
|---|
| 1065 | self.a = a |
|---|
| 1066 | self.b = b |
|---|
| 1067 | self.child = IntersectionMatcher(a, b) |
|---|
| 1068 | |
|---|
| 1069 | def copy(self): |
|---|
| 1070 | return self.__class__(self.a.copy(), self.b.copy()) |
|---|
| 1071 | |
|---|
| 1072 | def replace(self): |
|---|
| 1073 | if not self.child.is_active(): return NullMatcher() |
|---|
| 1074 | return self |
|---|
| 1075 | |
|---|
| 1076 | def quality(self): |
|---|
| 1077 | return self.a.quality() |
|---|
| 1078 | |
|---|
| 1079 | def block_quality(self): |
|---|
| 1080 | return self.a.block_quality() |
|---|
| 1081 | |
|---|
| 1082 | def skip_to_quality(self, minquality): |
|---|
| 1083 | skipped = self.a.skip_to_quality(minquality) |
|---|
| 1084 | self.child._find_next() |
|---|
| 1085 | return skipped |
|---|
| 1086 | |
|---|
| 1087 | def weight(self): |
|---|
| 1088 | return self.a.weight() |
|---|
| 1089 | |
|---|
| 1090 | def score(self): |
|---|
| 1091 | return self.a.score() |
|---|
| 1092 | |
|---|
| 1093 | def supports(self, astype): |
|---|
| 1094 | return self.a.supports(astype) |
|---|
| 1095 | |
|---|
| 1096 | def value(self): |
|---|
| 1097 | return self.a.value() |
|---|
| 1098 | |
|---|
| 1099 | def value_as(self, astype): |
|---|
| 1100 | return self.a.value_as(astype) |
|---|
| 1101 | |
|---|
| 1102 | |
|---|
| 1103 | class AndMaybeMatcher(AdditiveBiMatcher): |
|---|
| 1104 | """Matches postings in the first sub-matcher, and if the same posting is |
|---|
| 1105 | in the second sub-matcher, adds their scores. |
|---|
| 1106 | """ |
|---|
| 1107 | |
|---|
| 1108 | def __init__(self, a, b): |
|---|
| 1109 | self.a = a |
|---|
| 1110 | self.b = b |
|---|
| 1111 | |
|---|
| 1112 | if a.is_active() and b.is_active() and a.id() != b.id(): |
|---|
| 1113 | b.skip_to(a.id()) |
|---|
| 1114 | |
|---|
| 1115 | def is_active(self): |
|---|
| 1116 | return self.a.is_active() |
|---|
| 1117 | |
|---|
| 1118 | def id(self): |
|---|
| 1119 | return self.a.id() |
|---|
| 1120 | |
|---|
| 1121 | def next(self): |
|---|
| 1122 | if not self.a.is_active(): raise ReadTooFar |
|---|
| 1123 | |
|---|
| 1124 | ar = self.a.next() |
|---|
| 1125 | br = False |
|---|
| 1126 | if self.a.is_active() and self.b.is_active(): |
|---|
| 1127 | br = self.b.skip_to(self.a.id()) |
|---|
| 1128 | return ar or br |
|---|
| 1129 | |
|---|
| 1130 | def skip_to(self, id): |
|---|
| 1131 | if not self.a.is_active(): raise ReadTooFar |
|---|
| 1132 | |
|---|
| 1133 | ra = self.a.skip_to(id) |
|---|
| 1134 | rb = False |
|---|
| 1135 | if self.a.is_active() and self.b.is_active(): |
|---|
| 1136 | rb = self.b.skip_to(id) |
|---|
| 1137 | return ra or rb |
|---|
| 1138 | |
|---|
| 1139 | def replace(self): |
|---|
| 1140 | ar = self.a.replace() |
|---|
| 1141 | br = self.b.replace() |
|---|
| 1142 | if not ar.is_active(): return NullMatcher() |
|---|
| 1143 | if not br.is_active(): return ar |
|---|
| 1144 | if ar is not self.a or br is not self.b: |
|---|
| 1145 | return self.__class__(ar, br) |
|---|
| 1146 | return self |
|---|
| 1147 | |
|---|
| 1148 | def skip_to_quality(self, minquality): |
|---|
| 1149 | a = self.a |
|---|
| 1150 | b = self.b |
|---|
| 1151 | minquality = minquality |
|---|
| 1152 | |
|---|
| 1153 | if not a.is_active(): raise ReadTooFar |
|---|
| 1154 | if not b.is_active(): |
|---|
| 1155 | return a.skip_to_quality(minquality) |
|---|
| 1156 | |
|---|
| 1157 | skipped = 0 |
|---|
| 1158 | aq = a.block_quality() |
|---|
| 1159 | bq = b.block_quality() |
|---|
| 1160 | while a.is_active() and b.is_active() and aq + bq <= minquality: |
|---|
| 1161 | if aq < bq: |
|---|
| 1162 | skipped += a.skip_to_quality(minquality - bq) |
|---|
| 1163 | aq = a.block_quality() |
|---|
| 1164 | else: |
|---|
| 1165 | skipped += b.skip_to_quality(minquality - aq) |
|---|
| 1166 | bq = b.block_quality() |
|---|
| 1167 | |
|---|
| 1168 | return skipped |
|---|
| 1169 | |
|---|
| 1170 | def weight(self): |
|---|
| 1171 | if self.a.id() == self.b.id(): |
|---|
| 1172 | return self.a.weight() + self.b.weight() |
|---|
| 1173 | else: |
|---|
| 1174 | return self.a.weight() |
|---|
| 1175 | |
|---|
| 1176 | def score(self): |
|---|
| 1177 | if self.b.is_active() and self.a.id() == self.b.id(): |
|---|
| 1178 | return self.a.score() + self.b.score() |
|---|
| 1179 | else: |
|---|
| 1180 | return self.a.score() |
|---|
| 1181 | |
|---|
| 1182 | def supports(self, astype): |
|---|
| 1183 | return self.a.supports(astype) |
|---|
| 1184 | |
|---|
| 1185 | def value(self): |
|---|
| 1186 | return self.a.value() |
|---|
| 1187 | |
|---|
| 1188 | def value_as(self, astype): |
|---|
| 1189 | return self.a.value_as(astype) |
|---|
| 1190 | |
|---|
| 1191 | |
|---|
| 1192 | class ConstantScoreMatcher(WrappingMatcher): |
|---|
| 1193 | def __init__(self, child, score=1.0): |
|---|
| 1194 | super(ConstantScoreMatcher, self).__init__(child) |
|---|
| 1195 | self._score = score |
|---|
| 1196 | |
|---|
| 1197 | def copy(self): |
|---|
| 1198 | return self.__class__(self.child.copy(), score=self._score) |
|---|
| 1199 | |
|---|
| 1200 | def _replacement(self, newchild): |
|---|
| 1201 | return self.__class__(newchild, score=self._score) |
|---|
| 1202 | |
|---|
| 1203 | def quality(self): |
|---|
| 1204 | return self._score |
|---|
| 1205 | |
|---|
| 1206 | def block_quality(self): |
|---|
| 1207 | return self._score |
|---|
| 1208 | |
|---|
| 1209 | def score(self): |
|---|
| 1210 | return self._score |
|---|
| 1211 | |
|---|
| 1212 | |
|---|
| 1213 | |
|---|
| 1214 | |
|---|
| 1215 | |
|---|
| 1216 | |
|---|
| 1217 | |
|---|
| 1218 | |
|---|
| 1219 | |
|---|
| 1220 | |
|---|
| 1221 | |
|---|
| 1222 | |
|---|
| 1223 | |
|---|
| 1224 | |
|---|
| 1225 | |
|---|
| 1226 | |
|---|
| 1227 | |
|---|
| 1228 | |
|---|
| 1229 | |
|---|
| 1230 | |
|---|
| 1231 | |
|---|
| 1232 | |
|---|
| 1233 | |
|---|
| 1234 | |
|---|
| 1235 | |
|---|
| 1236 | |
|---|
| 1237 | |
|---|
| 1238 | |
|---|
| 1239 | |
|---|
| 1240 | |
|---|
| 1241 | |
|---|
| 1242 | |
|---|
| 1243 | |
|---|
| 1244 | |
|---|
| 1245 | |
|---|
| 1246 | |
|---|
| 1247 | |
|---|
| 1248 | |
|---|
| 1249 | |
|---|
| 1250 | |
|---|
| 1251 | |
|---|
| 1252 | |
|---|
| 1253 | |
|---|
| 1254 | |
|---|
| 1255 | |
|---|
| 1256 | |
|---|
| 1257 | |
|---|
| 1258 | |
|---|
| 1259 | |
|---|
| 1260 | |
|---|
| 1261 | |
|---|
| 1262 | |
|---|
| 1263 | |
|---|
| 1264 | |
|---|
| 1265 | |
|---|
| 1266 | |
|---|
| 1267 | |
|---|
| 1268 | |
|---|
| 1269 | |
|---|
| 1270 | |
|---|
| 1271 | |
|---|
| 1272 | |
|---|
| 1273 | |
|---|
| 1274 | |
|---|
| 1275 | |
|---|
| 1276 | |
|---|
| 1277 | |
|---|
| 1278 | |
|---|
| 1279 | |
|---|
| 1280 | |
|---|
| 1281 | |
|---|
| 1282 | |
|---|
| 1283 | |
|---|
| 1284 | |
|---|
| 1285 | |
|---|
| 1286 | |
|---|
| 1287 | |
|---|
| 1288 | |
|---|
| 1289 | |
|---|
| 1290 | |
|---|
| 1291 | |
|---|
| 1292 | |
|---|
| 1293 | |
|---|
| 1294 | |
|---|
| 1295 | |
|---|
| 1296 | |
|---|
| 1297 | |
|---|
| 1298 | |
|---|
| 1299 | |
|---|
| 1300 | |
|---|
| 1301 | |
|---|
| 1302 | |
|---|
| 1303 | |
|---|
| 1304 | |
|---|
| 1305 | |
|---|
| 1306 | |
|---|
| 1307 | |
|---|
| 1308 | |
|---|
| 1309 | |
|---|
| 1310 | |
|---|
| 1311 | |
|---|
| 1312 | |
|---|
| 1313 | |
|---|
| 1314 | |
|---|
| 1315 | |
|---|
| 1316 | |
|---|
| 1317 | |
|---|
| 1318 | |
|---|
| 1319 | |
|---|
| 1320 | |
|---|
| 1321 | |
|---|
| 1322 | |
|---|
| 1323 | |
|---|
| 1324 | |
|---|
| 1325 | |
|---|
| 1326 | |
|---|
| 1327 | |
|---|
| 1328 | |
|---|
| 1329 | |
|---|
| 1330 | |
|---|
| 1331 | |
|---|
| 1332 | |
|---|
| 1333 | |
|---|
| 1334 | |
|---|
| 1335 | |
|---|
| 1336 | |
|---|
| 1337 | |
|---|
| 1338 | |
|---|
| 1339 | |
|---|
| 1340 | |
|---|
| 1341 | |
|---|
| 1342 | |
|---|
| 1343 | |
|---|
| 1344 | |
|---|
| 1345 | |
|---|
| 1346 | |
|---|
| 1347 | |
|---|
| 1348 | |
|---|
| 1349 | |
|---|
| 1350 | |
|---|
| 1351 | |
|---|
| 1352 | |
|---|
| 1353 | |
|---|
| 1354 | |
|---|
| 1355 | |
|---|
| 1356 | |
|---|
| 1357 | |
|---|
| 1358 | |
|---|
| 1359 | |
|---|