blob: 3c6396b48554d32439c1f5204df0bed6ef3ae0a9 [file] [log] [blame]
#
# Copyright (C) 2019 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""A helper script for validateRefactor.sh. Should generally not be used directly.
Can be used directly if validateRefactor.sh has already created the out-old & out-new dirs.
In such a case, it can be run to compare those directories without regenerating them.
This is generally only useful when updating baselines or iterating on this script itself.
Takes baseline names as CLI arguments, which may be passed through from validateRefactor.sh.
Typical usage example:
python validateRefactorHelper.py agpKmp
"""
import itertools
import logging
import queue
import re
import shutil
import subprocess
import sys
import threading
from typing import Dict
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# noto-emoji-compat `bundleinside`s an externally-built with-timestamps jar.
# classes.jar is compared using `diffuse` instead of unzipping and diffing class files.
bannedJars = ["-x", "noto-emoji-compat-java.jar", "-x", "classes.jar"]
# java and json aren't for unzipping, but the poor exclude-everything-but-jars regex doesn't
# exclude them. Same for exclude-non-klib and .kt/.knm
areNotZips = ["-x", r"**\.java", "-x", r"**\.json", "-x", r"**\.kt", "-x", r"**\.knm", "-x", r"**\.xml",
"-x", r"**\.sha1", "-x", r"**\.sha256", "-x", r"**\.sha512", "-x", r"**\.md5",
"-x", r"**\.module", "-x", r"**\.pom", "-x", r"**\.html"]
# keeps making my regexes fall over :(
hasNoExtension = ["-x", "manifest", "-x", "module"]
doNotUnzip = bannedJars + areNotZips + hasNoExtension
def diff(excludes):
return popenAndReturn(["diff", "-r", "../../out-old/dist/", "../../out-new/dist/"] + excludes)
def popenAndReturn(args):
logger.debug(" ".join(args))
return subprocess.Popen(args, stdout=subprocess.PIPE).stdout.read().decode("utf-8").split("\n")
# Finds and unzips all files with old/new diff that _do not_ match the argument regexes.
# Because the `diff` command doesn't have an --include, only --exclude.
def findFilesNotMatchingWithDiffAndUnzip(*regexesToExclude):
excludeArgs = list(itertools.chain.from_iterable(zip(["-x"]*9, regexesToExclude)))
# Exclude all things that are *not* the desired zip type
zipsWithDiffs = diff(["-q"] + excludeArgs + doNotUnzip)
# Take only changed files, not new/deleted ones (the diff there is obvious)
zipsWithDiffs = filter(lambda s: s.startswith("Files"), zipsWithDiffs)
zipsWithDiffs = map(lambda s: s.split()[1:4:2], zipsWithDiffs)
zipsWithDiffs = itertools.chain.from_iterable(zipsWithDiffs) # flatten
workQueueOfZips = queue.LifoQueue()
for it in zipsWithDiffs: workQueueOfZips.put(it)
# And unzip them
# If we spam unzip commands without a break, the unzips start failing.
# if we wait after every Popen, the script runs very slowly
# So create a pool of 10 unzip workers to consume from zipsWithDiffs
numWorkers = 10
workers = []
for i in range(min(numWorkers, workQueueOfZips.qsize())):
w = threading.Thread(target=unzipWorker, args=(workQueueOfZips,))
w.start()
workers.append(w)
for w in workers: w.join()
def unzipWorker(workQueueOfZips):
while not workQueueOfZips.empty():
zipFilePath = workQueueOfZips.get(0)
try: shutil.rmtree(zipFilePath+".unzipped/")
except FileNotFoundError: pass
logger.debug("unzipping " + zipFilePath)
subprocess.Popen(["unzip", "-qq", "-o", zipFilePath, "-d", zipFilePath+".unzipped/"]).wait()
diffusePath = "../../prebuilts/build-tools/diffuse/diffuse-0.3.0/bin/diffuser"
diffuseIsPresent = True
def compareWithDiffuse(listOfJars):
global diffuseIsPresent
if not diffuseIsPresent: return
for jarPath in list(filter(None, listOfJars)):
logger.info("jarpath: " + jarPath)
newJarPath = jarPath.replace("out-old", "out-new")
try: logger.info("\n".join(popenAndReturn([diffusePath, "diff", "--jar", jarPath, newJarPath])))
except FileNotFoundError:
logger.warning(f"https://github.com/JakeWharton/diffuse is not present on disk in expected location"
f" ${diffusePath}. You can install it.")
diffuseIsPresent = False
return
# We might care to know whether .sha1 or .md5 files have changed, but changes in those files will
# always be accompanied by more meaningful changes in other files, so we don"t need to show changes
# in .sha1 or .md5 files, or in .module files showing the hashes of other files, or config names.
excludedHashes = ["-x", "*.md5*", "-x", "*.sha**", "-I", " \"md5\".*",
"-I", " \"sha.*", "-I", " \"size\".*", "-I", " \"name\".*"]
# Don"t care about maven-metadata files because they have timestamps in them.
# temporarily ignore knm files
# If changes to the dackka args json are meaningful, they will affect the generated docs and show diff there
excludedFiles = ["-x", "*maven-metadata.xml**", "-x", r"**\.knm", "-x", "dackkaArgs-docs-tip-of-tree.json"]
# Also, ignore files that we already unzipped
excludedZips = ["-x", "*.zip", "-x", "*.jar", "-x", "*.aar", "-x", "*.apk", "-x", "*.klib"]
# These are baselined changes that we understand and know are no-ops in refactors
# "Unskippable" changes are multi-line and can't be skipped in `diff`, so post-process
baselinedChangesForAgpKmp = [
# these are new attributes being added
"""> "org.gradle.libraryelements": "aar",""",
"""> "org.gradle.jvm.environment": "android",""",
"""> "org.gradle.jvm.environment": "non-jvm",""",
"""> "org.gradle.jvm.environment": "standard-jvm",""",
"""> <type>aar</type>""",
# this attribute swap occurs alongside the above new attributes added.
# https://chat.google.com/room/AAAAW8qmCIs/4phaNn_gsrc
"""< "org.jetbrains.kotlin.platform.type": "androidJvm\"""",
"""> "org.jetbrains.kotlin.platform.type": "jvm\"""",
# name-only change; nothing resolves based on names
"""< "name": "releaseApiElements-published",""",
"""> "name": "androidApiElements-published",""",
""" <pre>actual typealias""", # open bug in dackka b/339221337
# we are switching from our KMP sourcejars solution to the upstream one
"""< "org.gradle.docstype": "fake-sources",""",
"""> "org.gradle.docstype": "sources",""",
]
unskippableBaselinedChangesForAgpKmp = [
# This was an AGP workaround for a dependency resolution issue for kotlin stdlib
# https://chat.google.com/room/AAAAW8qmCIs/4phaNn_gsrc
re.compile(r"""
[0-9]+,[0-9]+c[0-9]+
< \},
< "excludes": \[
< \{
< "group": "org.jetbrains.kotlin",
< "module": "kotlin-stdlib-common"
< \},
< \{
< "group": "org.jetbrains.kotlin",
< "module": "kotlin-test-common"
< \},
< \{
< "group": "org.jetbrains.kotlin",
< "module": "kotlin-test-annotations-common"
< \}
< \]
---
> \}"""),
re.compile(r"""
< <exclusions>
< <exclusion>
< <groupId>org.jetbrains.kotlin</groupId>
< <artifactId>kotlin-stdlib-common</artifactId>
< </exclusion>
< <exclusion>
< <groupId>org.jetbrains.kotlin</groupId>
< <artifactId>kotlin-test-common</artifactId>
< </exclusion>
< <exclusion>
< <groupId>org.jetbrains.kotlin</groupId>
< <artifactId>kotlin-test-annotations-common</artifactId>
< </exclusion>
< </exclusions>"""),
# .module files[] blocks aren't ordered; baseline reordering of samples-sources b/374956513
re.compile(r"""
[0-9]+,[0-9]+d[0-9]+
< "name": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar",
< "url": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar",
< "size": [0-9]+,
< "sha512": "[0-9a-z]+",
< "sha256": "[0-9a-z]+",
< "sha1": "[0-9a-z]+",
< "md5": "[0-9a-z]+"
< \},
< \{
[0-9]+a[0-9]+,[0-9]+
> \},
> \{
> "name": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar",
> "url": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar",
> "size": [0-9]+,
> "sha512": "[0-9a-z]+",
> "sha256": "[0-9a-z]+",
> "sha1": "[0-9a-z]+",
> "md5": "[0-9a-z]+"
"""),
# This one is okay because the common pom expresses a dependency on the jvm pom
# https://repo1.maven.org/maven2/org/jetbrains/kotlinx/kotlinx-coroutines-core/1.7.3/kotlinx-coroutines-core-1.7.3.pom
re.compile(r"""[0-9]+c[0-9]+
< <artifactId>kotlinx-coroutines-core-jvm</artifactId>
---
> <artifactId>kotlinx-coroutines-core</artifactId>"""),
# AGP-KMP adds a new default sourceSet, which in itself doesn't do anything
re.compile(r"""(11,17d10|12,18d11)
< "name": "androidRelease",
< "dependencies": \[
< "commonMain"
< \],
< "analysisPlatform": "jvm"
< \},
< \{
"""),
]
baselines = []
baselinedChanges = []
unskippableBaselinedChanges = []
arguments = sys.argv[1:]
if "agpKmp" in arguments:
arguments.remove("agpKmp"); baselines += ["agpKmp"]
logger.info("IGNORING DIFF FOR agpKmp")
baselinedChanges += baselinedChangesForAgpKmp
unskippableBaselinedChanges += unskippableBaselinedChangesForAgpKmp
excludedFiles += ["-x", r"**\.aar.unzipped/res"] # agp-kmp may add this empty
if arguments:
logger.error("invalid argument(s) for validateRefactorHelper: " + ", ".join(arguments))
logger.error("currently recognized arguments: agpKmp")
exit()
# interleave "-I" to tell diffutils to 'I'gnore the baselined lines
baselinedChangesArgs = list(itertools.chain.from_iterable(zip(["-I"]*99, [it.removeprefix(">").removeprefix("<") for it in baselinedChanges])))
def removeLinesStartingWith(listOfStrings, listOfStringsToMatchAgainst):
return [line for line in listOfStrings if not any(line.startswith(it) for it in listOfStringsToMatchAgainst)]
# removeLinesWithChangedSuffixes(["foo"], ["foo-bar"], "-bar") returns [], []
def removeLinesWithChangedSuffixes(newStrings, oldStrings, newSuffix, oldSuffix=""):
possibleIndices = [i for i, string in enumerate(newStrings) if string.endswith(newSuffix)]
convertedMap: Dict[int, str] = {i: newStrings[i].replace(newSuffix, oldSuffix) for i in possibleIndices}
confirmedIndicesNew = [i for i, converted in convertedMap.items() if converted in oldStrings]
confirmedIndicesOld = [oldStrings.index(convertedMap[i]) for i in confirmedIndicesNew]
resultNew = [string for i, string in enumerate(newStrings) if i not in confirmedIndicesNew]
resultOld = [string for i, string in enumerate(oldStrings) if i not in confirmedIndicesOld]
return resultNew, resultOld
# remove baselined elements from a single diff segment, starting with a location-in-file element like 223c220
def processDiffSegment(segment, fileExtension):
if segment == "": return ""
lines = segment.split("\n")
lines = removeLinesStartingWith(lines, baselinedChanges)
removed = [line[1:] for line in lines if line.startswith("<")]
added = [line[1:] for line in lines if line.startswith(">")]
if (fileExtension == "pom") and "agpKmp" in baselines:
# Ignore artifactIds' new -jvm and -android suffixes in poms b/356612738
added, removed = removeLinesWithChangedSuffixes(added, removed, "-jvm</artifactId>", "</artifactId>")
added, removed = removeLinesWithChangedSuffixes(added, removed, "-android</artifactId>", "</artifactId>")
keptContentLines = set(">" + it for it in added).union(set("<" + it for it in removed))
# Do not keep any formatting lines or the header if there is no content
if len(keptContentLines) == 0: return ""
# return value is based on `lines` because we want to retain ordering we may have lost during processing
# We want to keep keptContentLines, and formatting lines like "---" and the header (which don't start with <>).
return "\n".join([line for line in lines if (line != "") and ((not line[0] in "<>") or line in keptContentLines)])
# The output of diff entails multiple files, and multiple segments per file
# This function removes baselined changes from the entire diff output
def processMegaDiff(inputString):
perFileDiffs = inputString.split("diff -r")
processedPerFileDiffs = []
for i in range(1, len(perFileDiffs)):
diffStatement, _, diffContent = perFileDiffs[i].partition("\n")
newFilePath = diffStatement.rpartition(" ")[2]
fileExtension = newFilePath.rpartition(".")[2]
for multilineBaselinedElement in unskippableBaselinedChanges:
diffContent = multilineBaselinedElement.sub("", diffContent)
diffSegments = re.split(r'(^[0-9]+[0-9acd,]*\n)', diffContent, flags=re.MULTILINE)
result = []
# every other segment is a segment header like 99,112d87; 0th is ""
for j in range(1, len(diffSegments)-1, 2):
# a complete segment is a location-in-file header and everything until the next header. E.g.
# 83c70
# < <artifactId>kotlinx-coroutines-core-jvm</artifactId>
# ---
# > <artifactId>kotlinx-coroutines-core</artifactId>
segment = diffSegments[j] + diffSegments[j+1]
processedSegment = processDiffSegment(segment, fileExtension)
if processedSegment != "": result.append(processedSegment)
if len(result) != 0: processedPerFileDiffs += [newFilePath + "\n" + "\n".join(result)]
return "\ndiff ".join(processedPerFileDiffs)
# We unzip multiple times in this order because e.g. zips can contain apks.
# Find all zip files with a diff, e.g. the tip-of-tree-repository file, and maybe the docs zip
logger.info("UNZIPPING ZIP FILES");
findFilesNotMatchingWithDiffAndUnzip(r"**\.[^z][a-z]*")
# Find all aar and apk files with a diff. The proper regex would be `.*\..*[^akpr]+.*`, but it
# doesn"t work in difftools exclude's very limited regex syntax.
logger.info("UNZIPPING AAR/APK FILES");
findFilesNotMatchingWithDiffAndUnzip(r"**\.zip", r"**\.jar", r"**\.klib")
# Find all jars and klibs and unzip them (comes after because they could be inside aars/apks).
logger.info("UNZIPPING JAR/KLIB FILES");
findFilesNotMatchingWithDiffAndUnzip(r"**\.zip", r"**\.aar", r"**\.apk")
# now find all diffs in classes.jars
# TODO(375636734) Disabled because this tracks internal methods' diffs
# classesJarsWithDiffs = popenAndReturn(["find", "../../out-old/dist/", "-name", "classes.jar"])
# logger.info("classes.jar s: " + str(classesJarsWithDiffs))
# compareWithDiffuse(classesJarsWithDiffs)
# Now find all diffs in non-zipped files
finalExcludes = excludedHashes + excludedFiles + excludedZips + baselinedChangesArgs
finalDiff = "\n".join(diff(finalExcludes))
finalDiff = processMegaDiff(finalDiff)
print(finalDiff)