archive_utils.py - platform/tools/external_updater - Git at Google

 # Copyright (C) 2018 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Functions to process archive files."""

 import os
 import tempfile
 import tarfile
 import urllib.parse
 import zipfile


 class ZipFileWithPermission(zipfile.ZipFile):
     """Subclassing Zipfile to preserve file permission.

     See https://bugs.python.org/issue15795
     """
     def _extract_member(self, member, targetpath, pwd):
         ret_val = super()._extract_member(member, targetpath, pwd)

         if not isinstance(member, zipfile.ZipInfo):
             member = self.getinfo(member)
         attr = member.external_attr >> 16
         if attr != 0:
             os.chmod(ret_val, attr)
         return ret_val


 def unzip(archive_path, target_path):
     """Extracts zip file to a path.

     Args:
         archive_path: Path to the zip file.
         target_path: Path to extract files to.
     """

     with ZipFileWithPermission(archive_path) as zfile:
         zfile.extractall(target_path)


 def untar(archive_path, target_path):
     """Extracts tar file to a path.

     Args:
         archive_path: Path to the tar file.
         target_path: Path to extract files to.
     """

     with tarfile.open(archive_path, mode='r') as tfile:
         tfile.extractall(target_path)


 ARCHIVE_TYPES = {
     '.zip': unzip,
     '.tar.gz': untar,
     '.tar.bz2': untar,
     '.tar.xz': untar,
 }


 def is_supported_archive(url):
     """Checks whether the url points to a supported archive."""
     return get_extract_func(url) is not None


 def get_extract_func(url):
     """Gets the function to extract an archive.

     Args:
         url: The url to the archive file.

     Returns:
         A function to extract the archive. None if not found.
     """

     parsed_url = urllib.parse.urlparse(url)
     filename = os.path.basename(parsed_url.path)
     for ext, func in ARCHIVE_TYPES.items():
         if filename.endswith(ext):
             return func
     # crates.io download url does not have file suffix
     # e.g., https://crates.io/api/v1/crates/syn/1.0.16/download
     if url.find('/crates.io/api/') > 0 or url.find('/static.crates.io/crates/'):
         return untar
     return None


 def download_and_extract(url):
     """Downloads and extracts an archive file to a temporary directory.

     Args:
         url: Url to download.

     Returns:
         Path to the temporary directory.
     """

     print(f'Downloading {url}')
     archive_file, _headers = urllib.request.urlretrieve(url)

     temporary_dir = tempfile.mkdtemp()
     print(f'Extracting {archive_file} to {temporary_dir}')
     get_extract_func(url)(archive_file, temporary_dir)

     return temporary_dir


 def find_archive_root(path):
     """Finds the real root of an extracted archive.

     Sometimes archives has additional layers of directories. This function tries
     to guess the right 'root' path by entering all single sub-directories.

     Args:
         path: Path to the extracted archive.

     Returns:
         The root path we found.
     """
     for root, dirs, files in os.walk(path):
         if files or len(dirs) > 1:
             return root
     return path
	# Copyright (C) 2018 The Android Open Source Project
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""Functions to process archive files."""

	import os
	import tempfile
	import tarfile
	import urllib.parse
	import zipfile


	class ZipFileWithPermission(zipfile.ZipFile):
	"""Subclassing Zipfile to preserve file permission.

	See https://bugs.python.org/issue15795
	"""
	def _extract_member(self, member, targetpath, pwd):
	ret_val = super()._extract_member(member, targetpath, pwd)

	if not isinstance(member, zipfile.ZipInfo):
	member = self.getinfo(member)
	attr = member.external_attr >> 16
	if attr != 0:
	os.chmod(ret_val, attr)
	return ret_val


	def unzip(archive_path, target_path):
	"""Extracts zip file to a path.

	Args:
	archive_path: Path to the zip file.
	target_path: Path to extract files to.
	"""

	with ZipFileWithPermission(archive_path) as zfile:
	zfile.extractall(target_path)


	def untar(archive_path, target_path):
	"""Extracts tar file to a path.

	Args:
	archive_path: Path to the tar file.
	target_path: Path to extract files to.
	"""

	with tarfile.open(archive_path, mode='r') as tfile:
	tfile.extractall(target_path)


	ARCHIVE_TYPES = {
	'.zip': unzip,
	'.tar.gz': untar,
	'.tar.bz2': untar,
	'.tar.xz': untar,
	}


	def is_supported_archive(url):
	"""Checks whether the url points to a supported archive."""
	return get_extract_func(url) is not None


	def get_extract_func(url):
	"""Gets the function to extract an archive.

	Args:
	url: The url to the archive file.

	Returns:
	A function to extract the archive. None if not found.
	"""

	parsed_url = urllib.parse.urlparse(url)
	filename = os.path.basename(parsed_url.path)
	for ext, func in ARCHIVE_TYPES.items():
	if filename.endswith(ext):
	return func
	# crates.io download url does not have file suffix
	# e.g., https://crates.io/api/v1/crates/syn/1.0.16/download
	if url.find('/crates.io/api/') > 0 or url.find('/static.crates.io/crates/'):
	return untar
	return None


	def download_and_extract(url):
	"""Downloads and extracts an archive file to a temporary directory.

	Args:
	url: Url to download.

	Returns:
	Path to the temporary directory.
	"""

	print(f'Downloading {url}')
	archive_file, _headers = urllib.request.urlretrieve(url)

	temporary_dir = tempfile.mkdtemp()
	print(f'Extracting {archive_file} to {temporary_dir}')
	get_extract_func(url)(archive_file, temporary_dir)

	return temporary_dir


	def find_archive_root(path):
	"""Finds the real root of an extracted archive.

	Sometimes archives has additional layers of directories. This function tries
	to guess the right 'root' path by entering all single sub-directories.

	Args:
	path: Path to the extracted archive.

	Returns:
	The root path we found.
	"""
	for root, dirs, files in os.walk(path):
	if files or len(dirs) > 1:
	return root
	return path