move fuzzy searching into separate file

This commit is contained in:
Daniel Sun 2023-02-19 16:39:26 +00:00
parent e99ef5203f
commit 72cd844672
2 changed files with 50 additions and 47 deletions

View File

@ -1,6 +1,52 @@
import datetime
class fuzzy_result:
def __init__(self, string, score):
self.string = string
self.score = score
# consider a score worse if the edit distance is larger
def __lt__(self, other):
return self.score > other.score
def __gt__(self, other):
return self.score < other.score
def __le__(self, other):
return self.score >= other.score
def __ge__(self, other):
return self.score <= other.score
def __eq__(self, other):
return self.score == other.score
def __ne__(self, other):
return self.score != other.score
# compute levenshtein edit distance, adapted from rosetta code
def fuzzy_match(s1, s2):
if len(s1) == 0:
return len(s2)
if len(s2) == 0:
return len(s1)
edits = [i for i in range(len(s2) + 1)]
for i in range(len(s1)):
corner = i
edits[0] = i + 1
for j in range(len(s2)):
upper = edits[j + 1]
if s1[i] == s2[j]:
edits[j + 1] = corner
else:
m = min(corner, upper, edits[j])
edits[j + 1] = m + 1
corner = upper
return edits[-1]
def get_current_datetime() -> datetime.datetime:
# We place this in a separate function so that we can mock it out
# in our unit tests.

View File

@ -5,6 +5,7 @@ from zope import component
from .utils import authz_restrict_to_syscom, is_truthy, \
create_streaming_response, development_only
from ceo_common.interfaces import ILDAPService
from ceo_common.utils import fuzzy_result, fuzzy_match
from ceod.transactions.groups import (
AddGroupTransaction,
AddMemberToGroupTransaction,
@ -37,58 +38,14 @@ def get_group(group_name):
@bp.route('/search/<query>/<count>')
def search_group(query, count):
# compute levenshtein edit distance, adapted from rosetta code
def _fuzzy_match(s1, s2):
if len(s1) == 0:
return len(s2)
if len(s2) == 0:
return len(s1)
edits = [i for i in range(len(s2) + 1)]
for i in range(len(s1)):
corner = i
edits[0] = i + 1
for j in range(len(s2)):
upper = edits[j + 1]
if s1[i] == s2[j]:
edits[j + 1] = corner
else:
m = min(corner, upper, edits[j])
edits[j + 1] = m + 1
corner = upper
return edits[-1]
class _fuzzy_result:
def __init__(self, string, score):
self.string = string
self.score = score
# consider a score worse if the edit distance is larger
def __lt__(self, other):
return self.score > other.score
def __gt__(self, other):
return self.score < other.score
def __le__(self, other):
return self.score >= other.score
def __ge__(self, other):
return self.score <= other.score
def __eq__(self, other):
return self.score == other.score
def __ne__(self, other):
return self.score != other.score
query = str(query)
count = int(count)
ldap_srv = component.getUtility(ILDAPService)
clubs = ldap_srv.get_clubs()
scores = [_fuzzy_result("", 99999) for _ in range(count)]
scores = [fuzzy_result("", 99999) for _ in range(count)]
for club in clubs:
score = _fuzzy_match(query, str(club.cn))
result = _fuzzy_result(str(club.cn), score)
score = fuzzy_match(query, str(club.cn))
result = fuzzy_result(str(club.cn), score)
heappushpop(scores, result)
result = [score.string for score in nlargest(count, scores)]