[Updater] Prefer url similar to previous one
This change computes edit distant between old url and each of new urls.
And use the url most like previous one.
Test: update any library
Change-Id: I959a22168652c7543da2cdb29d36a1d061ade7e9
diff --git a/github_archive_updater.py b/github_archive_updater.py
index e42e7b0..ea7ffc4 100644
--- a/github_archive_updater.py
+++ b/github_archive_updater.py
@@ -16,7 +16,6 @@
import json
import re
-import shutil
import urllib.request
import archive_utils
@@ -29,6 +28,39 @@
GITHUB_URL_RE = re.compile(GITHUB_URL_PATTERN)
+def _edit_distance(str1, str2):
+ prev = list(range(0, len(str2) + 1))
+ for i, chr1 in enumerate(str1):
+ cur = [i + 1]
+ for j, chr2 in enumerate(str2):
+ if chr1 == chr2:
+ cur.append(prev[j])
+ else:
+ cur.append(min(prev[j + 1], prev[j], cur[j]) + 1)
+ prev = cur
+ return prev[len(str2)]
+
+
+def choose_best_url(urls, previous_url):
+ """Returns the best url to download from a list of candidate urls.
+
+ This function calculates similarity between previous url and each of new
+ urls. And returns the one best matches previous url.
+
+ Similarity is measured by editing distance.
+
+ Args:
+ urls: Array of candidate urls.
+ previous_url: String of the url used previously.
+
+ Returns:
+ One url from `urls`.
+ """
+ return min(urls, default=None,
+ key=lambda url: _edit_distance(
+ url, previous_url))
+
+
class GithubArchiveUpdater():
"""Updater for archives from GitHub.
@@ -98,18 +130,18 @@
"""
supported_assets = [
- a for a in self.data['assets']
+ a['browser_download_url'] for a in self.data['assets']
if archive_utils.is_supported_archive(a['browser_download_url'])]
- # Finds the minimum sized archive to download.
- minimum_asset = min(
- supported_assets, key=lambda asset: asset['size'], default=None)
- if minimum_asset is not None:
- latest_url = minimum_asset.get('browser_download_url')
- else:
- # Guess the tarball url for source code.
- latest_url = 'https://github.com/{}/{}/archive/{}.tar.gz'.format(
- self.owner, self.repo, self.data.get('tag_name'))
+ # Adds source code urls.
+ supported_assets.append(
+ 'https://github.com/{}/{}/archive/{}.tar.gz'.format(
+ self.owner, self.repo, self.data.get('tag_name')))
+ supported_assets.append(
+ 'https://github.com/{}/{}/archive/{}.zip'.format(
+ self.owner, self.repo, self.data.get('tag_name')))
+
+ latest_url = choose_best_url(supported_assets, self.old_url.value)
temporary_dir = None
try: