#! /usr/bin/python2.3 import sys, marshal sys.path.append("../..") from pprint import pprint from lib import textfilters filters = textfilters class SearchResults(object): def __init__(self, searchFile): self._searchFile = searchFile self._query = '' self._phrase = [] self._searchTable = None self._rankedURLs= None self._initSearchFile() def _initSearchFile(self): fd = open(self._searchFile) self._searchTable = marshal.load(fd) fd.close() def query(self, query): self._query = query def _parseQuery(self): tokens = self._query.split() self._phrase = filters.norm_words(tokens) def _calcResults(self): itemsWithScore = {} itemsWithCount = {} for word in self._phrase: items = self._searchTable[word] for item in items: url, count = item try: itemsWithScore[url] += 1 except KeyError: itemsWithScore[url] = 1 itemsWithCount[url] = count hitGroups = {} for url, count in itemsWithScore.items(): try: hitGroups[count].append(url) except KeyError: hitGroups[count] = [url] counts = hitGroups.keys() counts.sort() counts.reverse() finalResults = [] for count in counts: urls = hitGroups[count] rank = [] for url in urls: rank.append((itemsWithCount[url], url)) rank.sort() rank.reverse() finalResults += map(lambda l: l[1], rank) self._rankedURLs = finalResults def getResults(self): self._parseQuery() self._calcResults() results = [] for url in self._rankedURLs: titleWords = self._searchTable[url] results.append((url, titleWords)) return results if __name__ == '__main__': args = sys.argv[1:] print args searchFile = args[0] query = ' '.join(args[1:]) sr = SearchResults(searchFile) sr.query(query) pprint(sr.getResults())