[Updater] Prefer url similar to previous one
This change computes edit distant between old url and each of new urls.
And use the url most like previous one.
Test: update any library
Change-Id: I959a22168652c7543da2cdb29d36a1d061ade7e9
diff --git a/Android.bp b/Android.bp
index 34e433b..b10515d 100644
--- a/Android.bp
+++ b/Android.bp
@@ -16,9 +16,23 @@
name: "external_updater",
main: "external_updater.py",
srcs: [
+ "external_updater.py",
+ ],
+ libs: [
+ "external_updater_lib",
+ ],
+}
+
+python_library_host {
+ name: "external_updater_lib",
+ srcs: [
"*.py",
"metadata.proto",
],
+ exclude_srcs: [
+ "*_test.py",
+ "external_updater.py",
+ ],
libs: [
"python-symbol",
"libprotobuf-python",
@@ -41,3 +55,13 @@
},
}
+python_test_host {
+ name: "external_updater_test",
+ main: "external_updater_test.py",
+ srcs: [
+ "external_updater_test.py",
+ ],
+ libs: [
+ "external_updater_lib",
+ ],
+}
diff --git a/external_updater_test.py b/external_updater_test.py
new file mode 100644
index 0000000..0c82f05
--- /dev/null
+++ b/external_updater_test.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for external updater."""
+
+import unittest
+
+import github_archive_updater
+
+
+class ExternalUpdaterTest(unittest.TestCase):
+ """Unit tests for external updater."""
+
+ def test_url_selection(self):
+ """Tests that GithubArchiveUpdater can choose the right url."""
+ prefix = "https://github.com/author/project/"
+ urls = [
+ prefix + "releases/download/ver-1.0/ver-1.0-binary.zip",
+ prefix + "releases/download/ver-1.0/ver-1.0-binary.tar.gz",
+ prefix + "releases/download/ver-1.0/ver-1.0-src.zip",
+ prefix + "releases/download/ver-1.0/ver-1.0-src.tar.gz",
+ prefix + "archive/ver-1.0.zip",
+ prefix + "archive/ver-1.0.tar.gz",
+ ]
+
+ previous_url = prefix + "releases/download/ver-0.9/ver-0.9-src.tar.gz"
+ url = github_archive_updater.choose_best_url(urls, previous_url)
+ expected_url = prefix + "releases/download/ver-1.0/ver-1.0-src.tar.gz"
+ self.assertEqual(url, expected_url)
+
+ previous_url = prefix + "archive/ver-0.9.zip"
+ url = github_archive_updater.choose_best_url(urls, previous_url)
+ expected_url = prefix + "archive/ver-1.0.zip"
+ self.assertEqual(url, expected_url)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/git_utils.py b/git_utils.py
index 8e8f96d..5743b8f 100644
--- a/git_utils.py
+++ b/git_utils.py
@@ -22,6 +22,7 @@
return subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
check=True, cwd=cwd)
+
def fetch(proj_path, remote_names):
"""Runs git fetch.
@@ -31,6 +32,7 @@
"""
_run(['git', 'fetch', '--multiple'] + remote_names, cwd=proj_path)
+
def add_remote(proj_path, name, url):
"""Adds a git remote.
@@ -41,6 +43,7 @@
"""
_run(['git', 'remote', 'add', name, url], cwd=proj_path)
+
def list_remotes(proj_path):
"""Lists all Git remotes.
@@ -54,6 +57,7 @@
lines = out.stdout.decode('utf-8').splitlines()
return dict([line.split()[0:2] for line in lines])
+
def get_commits_ahead(proj_path, branch, base_branch):
"""Lists commits in `branch` but not `base_branch`."""
out = _run(['git', 'rev-list', '--left-only',
@@ -61,11 +65,13 @@
proj_path)
return out.stdout.decode('utf-8').splitlines()
+
def get_commit_time(proj_path, commit):
"""Gets commit time of one commit."""
out = _run(['git', 'show', '-s', '--format=%ct', commit], cwd=proj_path)
return datetime.datetime.fromtimestamp(int(out.stdout))
+
def list_remote_branches(proj_path, remote_name):
"""Lists all branches for a remote."""
out = _run(['git', 'branch', '-r'], cwd=proj_path)
diff --git a/github_archive_updater.py b/github_archive_updater.py
index e42e7b0..ea7ffc4 100644
--- a/github_archive_updater.py
+++ b/github_archive_updater.py
@@ -16,7 +16,6 @@
import json
import re
-import shutil
import urllib.request
import archive_utils
@@ -29,6 +28,39 @@
GITHUB_URL_RE = re.compile(GITHUB_URL_PATTERN)
+def _edit_distance(str1, str2):
+ prev = list(range(0, len(str2) + 1))
+ for i, chr1 in enumerate(str1):
+ cur = [i + 1]
+ for j, chr2 in enumerate(str2):
+ if chr1 == chr2:
+ cur.append(prev[j])
+ else:
+ cur.append(min(prev[j + 1], prev[j], cur[j]) + 1)
+ prev = cur
+ return prev[len(str2)]
+
+
+def choose_best_url(urls, previous_url):
+ """Returns the best url to download from a list of candidate urls.
+
+ This function calculates similarity between previous url and each of new
+ urls. And returns the one best matches previous url.
+
+ Similarity is measured by editing distance.
+
+ Args:
+ urls: Array of candidate urls.
+ previous_url: String of the url used previously.
+
+ Returns:
+ One url from `urls`.
+ """
+ return min(urls, default=None,
+ key=lambda url: _edit_distance(
+ url, previous_url))
+
+
class GithubArchiveUpdater():
"""Updater for archives from GitHub.
@@ -98,18 +130,18 @@
"""
supported_assets = [
- a for a in self.data['assets']
+ a['browser_download_url'] for a in self.data['assets']
if archive_utils.is_supported_archive(a['browser_download_url'])]
- # Finds the minimum sized archive to download.
- minimum_asset = min(
- supported_assets, key=lambda asset: asset['size'], default=None)
- if minimum_asset is not None:
- latest_url = minimum_asset.get('browser_download_url')
- else:
- # Guess the tarball url for source code.
- latest_url = 'https://github.com/{}/{}/archive/{}.tar.gz'.format(
- self.owner, self.repo, self.data.get('tag_name'))
+ # Adds source code urls.
+ supported_assets.append(
+ 'https://github.com/{}/{}/archive/{}.tar.gz'.format(
+ self.owner, self.repo, self.data.get('tag_name')))
+ supported_assets.append(
+ 'https://github.com/{}/{}/archive/{}.zip'.format(
+ self.owner, self.repo, self.data.get('tag_name')))
+
+ latest_url = choose_best_url(supported_assets, self.old_url.value)
temporary_dir = None
try: