-
-
Notifications
You must be signed in to change notification settings - Fork 738
add fuzzy trigram searching to add-on store #19309
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
5af2ed2
d6f4d56
57c7cf8
a35b163
57af463
c43510e
c52bd17
cc15fc9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,10 +7,10 @@ | |
| from dataclasses import dataclass | ||
| from enum import Enum | ||
|
|
||
| from functools import lru_cache | ||
| from locale import strxfrm | ||
| from typing import ( | ||
| Any, | ||
| FrozenSet, | ||
| Generic, | ||
| List, | ||
| Optional, | ||
|
|
@@ -50,13 +50,20 @@ def __lt__(self, other: Any) -> bool: ... | |
| class _AddonListFieldData: | ||
| displayString: str | ||
| width: int | ||
| hideStatuses: FrozenSet[_StatusFilterKey] = frozenset() | ||
| hideStatuses: frozenset[_StatusFilterKey] = frozenset() | ||
| """Hide this field if the current tab filter is in hideStatuses.""" | ||
|
|
||
|
|
||
| class AddonListField(_AddonListFieldData, Enum): | ||
| """An ordered enum of fields to use as columns in the add-on list.""" | ||
|
|
||
| searchRank = ( | ||
| # Translators: The name of a sorting option for the add-on store to sort by search relevance | ||
| pgettext("addonStore", "Search relevance"), | ||
| 0, | ||
| # hide for all statuses, as this is only used for sorting when a search filter is applied. | ||
| frozenset(_StatusFilterKey), | ||
| ) | ||
| displayName = ( | ||
| # Translators: The name of the column that contains names of addons. | ||
| pgettext("addonStore", "Name"), | ||
|
|
@@ -211,6 +218,32 @@ def canUseDisableAction(self) -> bool: | |
| ) | ||
| ) | ||
|
|
||
| @property | ||
| @lru_cache(maxsize=1) | ||
| def searchableText(self) -> str: | ||
| """Extract searchable text from addon.""" | ||
seanbudd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| model = self.model | ||
| searchableText = " ".join( | ||
| [ | ||
| model.displayName, | ||
| model.description, | ||
| model.addonId, | ||
| model.publisher if isinstance(model, _AddonStoreModel) else "", | ||
| model.author if isinstance(model, _AddonManifestModel) else "", | ||
| ], | ||
| ) | ||
| return searchableText.strip() | ||
|
|
||
| def searchRank(self, searchTerm: str) -> float: | ||
| """Calculate a search rank for this addon based on the filter trigrams.""" | ||
| searchTerm = searchTerm.strip() | ||
| if len(searchTerm) < AddonListVM.TRIGRAM_LENGTH: | ||
| return 1.0 # empty or too short search matches everything | ||
| addonSearchableText = self.searchableText | ||
| filterTrigrams = AddonListVM._generateTrigrams(searchTerm) | ||
| addonTrigrams = AddonListVM._generateTrigrams(addonSearchableText) | ||
| return AddonListVM._calculateTrigramSimilarity(filterTrigrams, addonTrigrams) | ||
|
|
||
| def __repr__(self) -> str: | ||
| return f"{self.__class__.__name__}: {self.Id}, {self.status}" | ||
|
|
||
|
|
@@ -233,9 +266,11 @@ def listItem(self, newListItem: Optional[AddonListItemVM]): | |
|
|
||
|
|
||
| class AddonListVM: | ||
| DEFAULT_SORT_FIELD = AddonListField.displayName | ||
|
|
||
| def __init__( | ||
| self, | ||
| addons: List[AddonListItemVM], | ||
| addons: list[AddonListItemVM], | ||
| storeVM: "AddonStoreVM", | ||
| ): | ||
| self._isLoading: bool = False | ||
|
|
@@ -244,14 +279,16 @@ def __init__( | |
| self.itemUpdated = extensionPoints.Action() | ||
| self.updated = extensionPoints.Action() | ||
| self.selectionChanged = extensionPoints.Action() | ||
| self.selectedAddonId: Optional[str] = None | ||
| self.selectedAddonId: str | None = None | ||
| self.lastSelectedAddonId = self.selectedAddonId | ||
| self._sortByModelField: AddonListField = AddonListField.displayName | ||
| self._filterString: Optional[str] = None | ||
| self._sortByModelField: AddonListField = self.DEFAULT_SORT_FIELD | ||
| self._prevSortByModelField: AddonListField = self.DEFAULT_SORT_FIELD | ||
| self._filterString: str | None = None | ||
| self._reverseSort: bool = False | ||
| self._prevReverseSort: bool = self._reverseSort | ||
|
|
||
| self._setSelectionPending = False | ||
| self._addonsFilteredOrdered: List[str] = self._getFilteredSortedIds() | ||
| self._addonsFilteredOrdered: list[str] = self._getFilteredSortedIds() | ||
| self._validate( | ||
| sortField=self._sortByModelField, | ||
| selectionIndex=self.getSelectedIndex(), | ||
|
|
@@ -261,9 +298,13 @@ def __init__( | |
| self.resetListItems(addons) | ||
|
|
||
| @property | ||
| def presentedFields(self) -> List[AddonListField]: | ||
| def presentedFields(self) -> list[AddonListField]: | ||
| return [c for c in AddonListField if self._storeVM._filteredStatusKey not in c.hideStatuses] | ||
|
|
||
| @property | ||
| def sortableFields(self) -> list[AddonListField]: | ||
| return [AddonListField.searchRank] + self.presentedFields | ||
|
|
||
| def _itemDataUpdated(self, addonListItemVM: AddonListItemVM): | ||
| addonId: str = addonListItemVM.Id | ||
| log.debug(f"Item updated: {addonListItemVM!r}") | ||
|
|
@@ -391,7 +432,7 @@ def setSortField(self, modelField: AddonListField, reverse: bool = False): | |
| @property | ||
| def _columnSortChoices(self) -> list[str]: | ||
| columnChoices = [] | ||
| for c in self.presentedFields: | ||
| for c in self.sortableFields: | ||
| columnChoices.append( | ||
| pgettext( | ||
| "addonStore", | ||
|
|
@@ -414,35 +455,54 @@ def _columnSortChoices(self) -> list[str]: | |
| ) | ||
| return columnChoices | ||
|
|
||
| TRIGRAM_SEARCH_THRESHOLD = 0.3 | ||
| """Threshold for trigram search ranking to include an addon in the filtered list.""" | ||
| TRIGRAM_LENGTH = 3 | ||
| """Length of trigrams used for searching.""" | ||
|
|
||
| @staticmethod | ||
| @lru_cache(maxsize=256) | ||
| def _generateTrigrams(text: str) -> frozenset[str]: | ||
| """Generate character trigrams from text. | ||
| Used for searching. | ||
|
|
||
| :param text: The text to generate trigrams from. | ||
| :return: A frozenset of trigrams. | ||
| """ | ||
| normalized = strxfrm(text.strip()) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Warning:
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I believe it works as expected when performing trigram comparisons. e.g. ensuring an accented character matches the character itself.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will trigram search work on these strings? Also calling the result of
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, it will match by each char, in theory it should provide much better matching
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it's misleading. The conversion and stripping normalizes the text for the purpose of trigram searches |
||
| trigrams = set() | ||
| normalized = f" {normalized} " # padding to capture leading/trailing grams | ||
| for i in range(len(normalized) - 2): | ||
| trigrams.add(normalized[i : i + AddonListVM.TRIGRAM_LENGTH]) | ||
| return frozenset(trigrams) | ||
|
|
||
| @staticmethod | ||
| def _calculateTrigramSimilarity(searchTrigrams: frozenset[str], textTrigrams: frozenset[str]) -> float: | ||
| """Calculate similarity score between two sets of trigrams. Used for searching.""" | ||
| if not searchTrigrams: | ||
| return 1.0 # Empty search matches everything | ||
| matches = len(searchTrigrams & textTrigrams) | ||
| return matches / len(searchTrigrams) | ||
|
|
||
| def _getFilteredSortedIds(self) -> list[str]: | ||
| def _getSortFieldData(listItemVM: AddonListItemVM) -> "_SupportsLessThan": | ||
| def _getSortFieldData(listItemVM: AddonListItemVM[_AddonGUIModel]) -> "_SupportsLessThan": | ||
| if self._sortByModelField == AddonListField.publicationDate: | ||
| if getattr(listItemVM.model, "submissionTime", None): | ||
| listItemVM = cast(AddonListItemVM[_AddonStoreModel], listItemVM) | ||
| return listItemVM.model.submissionTime | ||
| addonStoreListItemVM = cast(AddonListItemVM[_AddonStoreModel], listItemVM) | ||
| return addonStoreListItemVM.model.submissionTime | ||
| return 0 | ||
| if self._sortByModelField == AddonListField.installDate: | ||
| listItemVM = cast(AddonListItemVM[_AddonManifestModel], listItemVM) | ||
| return listItemVM.model.installDate | ||
| addonManifestListItemVM = cast(AddonListItemVM[_AddonManifestModel], listItemVM) | ||
| return addonManifestListItemVM.model.installDate | ||
| if self._sortByModelField == AddonListField.searchRank: | ||
| return listItemVM.searchRank(self._filterString or "") | ||
| return strxfrm(self._getAddonFieldText(listItemVM, self._sortByModelField)) | ||
|
|
||
| def _containsTerm(detailsVM: AddonListItemVM, term: str) -> bool: | ||
| term = term.casefold() | ||
| model = detailsVM.model | ||
| inPublisher = isinstance(model, _AddonStoreModel) and term in model.publisher.casefold() | ||
| inAuthor = isinstance(model, _AddonManifestModel) and term in model.author.casefold() | ||
| return ( | ||
| term in model.displayName.casefold() | ||
| or term in model.description.casefold() | ||
| or term in model.addonId.casefold() | ||
| or inPublisher | ||
| or inAuthor | ||
| ) | ||
|
|
||
| filtered = ( | ||
| vm | ||
| for vm in self._addons.values() | ||
| if self._filterString is None or _containsTerm(vm, self._filterString) | ||
| if self._filterString is None | ||
| or vm.searchRank(self._filterString) >= self.TRIGRAM_SEARCH_THRESHOLD | ||
| ) | ||
| filteredSorted = list( | ||
| [vm.Id for vm in sorted(filtered, key=_getSortFieldData, reverse=self._reverseSort)], | ||
|
|
@@ -497,6 +557,11 @@ def _updateAddonListing(self): | |
| self.lastSelectedAddonId = self.selectedAddonId | ||
| self._addonsFilteredOrdered = newOrder | ||
|
|
||
| def _cachePreviousSortField(self) -> None: | ||
| """Cache the current sort field and order as previous sort field and order.""" | ||
| self._prevSortByModelField = self._sortByModelField | ||
| self._prevReverseSort = self._reverseSort | ||
|
|
||
| def applyFilter(self, filterText: str) -> None: | ||
| oldOrder = self._addonsFilteredOrdered | ||
| if not filterText: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.