blob: 3c6396b48554d32439c1f5204df0bed6ef3ae0a9 [file] [log] [blame]
Owen Gray877bbd32024-07-19 10:01:20 -04001#
2# Copyright (C) 2019 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16"""A helper script for validateRefactor.sh. Should generally not be used directly.
17
18Can be used directly if validateRefactor.sh has already created the out-old & out-new dirs.
19In such a case, it can be run to compare those directories without regenerating them.
20This is generally only useful when updating baselines or iterating on this script itself.
21Takes baseline names as CLI arguments, which may be passed through from validateRefactor.sh.
22
23Typical usage example:
24
25 python validateRefactorHelper.py agpKmp
26"""
27import itertools
Owen Graya04eca32024-10-09 14:47:47 -040028import logging
29import queue
Owen Graye5a112d2024-10-09 14:47:47 -040030import re
Owen Gray877bbd32024-07-19 10:01:20 -040031import shutil
32import subprocess
33import sys
Owen Graya04eca32024-10-09 14:47:47 -040034import threading
Owen Graye5a112d2024-10-09 14:47:47 -040035from typing import Dict
Owen Gray877bbd32024-07-19 10:01:20 -040036
Owen Graya04eca32024-10-09 14:47:47 -040037logger = logging.getLogger(__name__)
38logging.basicConfig(level=logging.INFO)
39
Owen Gray877bbd32024-07-19 10:01:20 -040040# noto-emoji-compat `bundleinside`s an externally-built with-timestamps jar.
41# classes.jar is compared using `diffuse` instead of unzipping and diffing class files.
42bannedJars = ["-x", "noto-emoji-compat-java.jar", "-x", "classes.jar"]
Owen Graya04eca32024-10-09 14:47:47 -040043# java and json aren't for unzipping, but the poor exclude-everything-but-jars regex doesn't
Owen Gray877bbd32024-07-19 10:01:20 -040044# exclude them. Same for exclude-non-klib and .kt/.knm
Owen Graya04eca32024-10-09 14:47:47 -040045areNotZips = ["-x", r"**\.java", "-x", r"**\.json", "-x", r"**\.kt", "-x", r"**\.knm", "-x", r"**\.xml",
46 "-x", r"**\.sha1", "-x", r"**\.sha256", "-x", r"**\.sha512", "-x", r"**\.md5",
Owen Gray8998a032024-11-13 09:21:26 -050047 "-x", r"**\.module", "-x", r"**\.pom", "-x", r"**\.html"]
Owen Gray877bbd32024-07-19 10:01:20 -040048# keeps making my regexes fall over :(
49hasNoExtension = ["-x", "manifest", "-x", "module"]
50doNotUnzip = bannedJars + areNotZips + hasNoExtension
51
52def diff(excludes):
53 return popenAndReturn(["diff", "-r", "../../out-old/dist/", "../../out-new/dist/"] + excludes)
54
55def popenAndReturn(args):
Owen Graya04eca32024-10-09 14:47:47 -040056 logger.debug(" ".join(args))
Owen Gray877bbd32024-07-19 10:01:20 -040057 return subprocess.Popen(args, stdout=subprocess.PIPE).stdout.read().decode("utf-8").split("\n")
58
Owen Graya04eca32024-10-09 14:47:47 -040059# Finds and unzips all files with old/new diff that _do not_ match the argument regexes.
60# Because the `diff` command doesn't have an --include, only --exclude.
61def findFilesNotMatchingWithDiffAndUnzip(*regexesToExclude):
62 excludeArgs = list(itertools.chain.from_iterable(zip(["-x"]*9, regexesToExclude)))
Owen Gray877bbd32024-07-19 10:01:20 -040063 # Exclude all things that are *not* the desired zip type
Owen Graya04eca32024-10-09 14:47:47 -040064 zipsWithDiffs = diff(["-q"] + excludeArgs + doNotUnzip)
Owen Gray877bbd32024-07-19 10:01:20 -040065 # Take only changed files, not new/deleted ones (the diff there is obvious)
66 zipsWithDiffs = filter(lambda s: s.startswith("Files"), zipsWithDiffs)
67 zipsWithDiffs = map(lambda s: s.split()[1:4:2], zipsWithDiffs)
Owen Graya04eca32024-10-09 14:47:47 -040068 zipsWithDiffs = itertools.chain.from_iterable(zipsWithDiffs) # flatten
69 workQueueOfZips = queue.LifoQueue()
70 for it in zipsWithDiffs: workQueueOfZips.put(it)
Owen Gray877bbd32024-07-19 10:01:20 -040071 # And unzip them
Owen Graya04eca32024-10-09 14:47:47 -040072 # If we spam unzip commands without a break, the unzips start failing.
73 # if we wait after every Popen, the script runs very slowly
74 # So create a pool of 10 unzip workers to consume from zipsWithDiffs
75 numWorkers = 10
76 workers = []
77 for i in range(min(numWorkers, workQueueOfZips.qsize())):
78 w = threading.Thread(target=unzipWorker, args=(workQueueOfZips,))
79 w.start()
80 workers.append(w)
81 for w in workers: w.join()
Owen Gray877bbd32024-07-19 10:01:20 -040082
Owen Graya04eca32024-10-09 14:47:47 -040083def unzipWorker(workQueueOfZips):
84 while not workQueueOfZips.empty():
85 zipFilePath = workQueueOfZips.get(0)
86 try: shutil.rmtree(zipFilePath+".unzipped/")
87 except FileNotFoundError: pass
88 logger.debug("unzipping " + zipFilePath)
89 subprocess.Popen(["unzip", "-qq", "-o", zipFilePath, "-d", zipFilePath+".unzipped/"]).wait()
Owen Gray877bbd32024-07-19 10:01:20 -040090
Owen Graya04eca32024-10-09 14:47:47 -040091diffusePath = "../../prebuilts/build-tools/diffuse/diffuse-0.3.0/bin/diffuser"
92
93diffuseIsPresent = True
Owen Gray877bbd32024-07-19 10:01:20 -040094def compareWithDiffuse(listOfJars):
Owen Graya04eca32024-10-09 14:47:47 -040095 global diffuseIsPresent
96 if not diffuseIsPresent: return
Owen Gray877bbd32024-07-19 10:01:20 -040097 for jarPath in list(filter(None, listOfJars)):
Owen Graya04eca32024-10-09 14:47:47 -040098 logger.info("jarpath: " + jarPath)
Owen Gray877bbd32024-07-19 10:01:20 -040099 newJarPath = jarPath.replace("out-old", "out-new")
Owen Graya04eca32024-10-09 14:47:47 -0400100 try: logger.info("\n".join(popenAndReturn([diffusePath, "diff", "--jar", jarPath, newJarPath])))
101 except FileNotFoundError:
102 logger.warning(f"https://github.com/JakeWharton/diffuse is not present on disk in expected location"
103 f" ${diffusePath}. You can install it.")
104 diffuseIsPresent = False
105 return
Owen Gray877bbd32024-07-19 10:01:20 -0400106
107# We might care to know whether .sha1 or .md5 files have changed, but changes in those files will
108# always be accompanied by more meaningful changes in other files, so we don"t need to show changes
109# in .sha1 or .md5 files, or in .module files showing the hashes of other files, or config names.
Owen Graya04eca32024-10-09 14:47:47 -0400110excludedHashes = ["-x", "*.md5*", "-x", "*.sha**", "-I", " \"md5\".*",
111"-I", " \"sha.*", "-I", " \"size\".*", "-I", " \"name\".*"]
Owen Gray877bbd32024-07-19 10:01:20 -0400112# Don"t care about maven-metadata files because they have timestamps in them.
Owen Graye5a112d2024-10-09 14:47:47 -0400113# temporarily ignore knm files
114# If changes to the dackka args json are meaningful, they will affect the generated docs and show diff there
115excludedFiles = ["-x", "*maven-metadata.xml**", "-x", r"**\.knm", "-x", "dackkaArgs-docs-tip-of-tree.json"]
Owen Gray877bbd32024-07-19 10:01:20 -0400116# Also, ignore files that we already unzipped
117excludedZips = ["-x", "*.zip", "-x", "*.jar", "-x", "*.aar", "-x", "*.apk", "-x", "*.klib"]
118
119# These are baselined changes that we understand and know are no-ops in refactors
120# "Unskippable" changes are multi-line and can't be skipped in `diff`, so post-process
121baselinedChangesForAgpKmp = [
122 # these are new attributes being added
Owen Graye5a112d2024-10-09 14:47:47 -0400123 """> "org.gradle.libraryelements": "aar",""",
124 """> "org.gradle.jvm.environment": "android",""",
125 """> "org.gradle.jvm.environment": "non-jvm",""",
126 """> "org.gradle.jvm.environment": "standard-jvm",""",
127 """> <type>aar</type>""",
Owen Gray877bbd32024-07-19 10:01:20 -0400128 # this attribute swap occurs alongside the above new attributes added.
129 # https://chat.google.com/room/AAAAW8qmCIs/4phaNn_gsrc
Owen Graye5a112d2024-10-09 14:47:47 -0400130 """< "org.jetbrains.kotlin.platform.type": "androidJvm\"""",
131 """> "org.jetbrains.kotlin.platform.type": "jvm\"""",
Owen Gray877bbd32024-07-19 10:01:20 -0400132 # name-only change; nothing resolves based on names
Owen Graye5a112d2024-10-09 14:47:47 -0400133 """< "name": "releaseApiElements-published",""",
134 """> "name": "androidApiElements-published",""",
135 """ <pre>actual typealias""", # open bug in dackka b/339221337
Owen Gray877bbd32024-07-19 10:01:20 -0400136 # we are switching from our KMP sourcejars solution to the upstream one
Owen Graye5a112d2024-10-09 14:47:47 -0400137 """< "org.gradle.docstype": "fake-sources",""",
138 """> "org.gradle.docstype": "sources",""",
Owen Gray877bbd32024-07-19 10:01:20 -0400139]
140unskippableBaselinedChangesForAgpKmp = [
Owen Graye5a112d2024-10-09 14:47:47 -0400141# This was an AGP workaround for a dependency resolution issue for kotlin stdlib
142# https://chat.google.com/room/AAAAW8qmCIs/4phaNn_gsrc
143re.compile(r"""
144[0-9]+,[0-9]+c[0-9]+
145< \},
146< "excludes": \[
147< \{
Owen Gray877bbd32024-07-19 10:01:20 -0400148< "group": "org.jetbrains.kotlin",
149< "module": "kotlin-stdlib-common"
Owen Graye5a112d2024-10-09 14:47:47 -0400150< \},
151< \{
Owen Gray877bbd32024-07-19 10:01:20 -0400152< "group": "org.jetbrains.kotlin",
153< "module": "kotlin-test-common"
Owen Graye5a112d2024-10-09 14:47:47 -0400154< \},
155< \{
Owen Gray877bbd32024-07-19 10:01:20 -0400156< "group": "org.jetbrains.kotlin",
157< "module": "kotlin-test-annotations-common"
Owen Graye5a112d2024-10-09 14:47:47 -0400158< \}
159< \]
Owen Gray877bbd32024-07-19 10:01:20 -0400160---
Owen Graye5a112d2024-10-09 14:47:47 -0400161> \}"""),
162re.compile(r"""
Owen Gray877bbd32024-07-19 10:01:20 -0400163< <exclusions>
164< <exclusion>
165< <groupId>org.jetbrains.kotlin</groupId>
166< <artifactId>kotlin-stdlib-common</artifactId>
167< </exclusion>
168< <exclusion>
169< <groupId>org.jetbrains.kotlin</groupId>
170< <artifactId>kotlin-test-common</artifactId>
171< </exclusion>
172< <exclusion>
173< <groupId>org.jetbrains.kotlin</groupId>
174< <artifactId>kotlin-test-annotations-common</artifactId>
175< </exclusion>
Owen Graye5a112d2024-10-09 14:47:47 -0400176< </exclusions>"""),
177# .module files[] blocks aren't ordered; baseline reordering of samples-sources b/374956513
178re.compile(r"""
179[0-9]+,[0-9]+d[0-9]+
180< "name": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar",
181< "url": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar",
182< "size": [0-9]+,
183< "sha512": "[0-9a-z]+",
184< "sha256": "[0-9a-z]+",
185< "sha1": "[0-9a-z]+",
186< "md5": "[0-9a-z]+"
187< \},
188< \{
189[0-9]+a[0-9]+,[0-9]+
190> \},
191> \{
192> "name": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar",
193> "url": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar",
194> "size": [0-9]+,
195> "sha512": "[0-9a-z]+",
196> "sha256": "[0-9a-z]+",
197> "sha1": "[0-9a-z]+",
198> "md5": "[0-9a-z]+"
199"""),
200# This one is okay because the common pom expresses a dependency on the jvm pom
201# https://repo1.maven.org/maven2/org/jetbrains/kotlinx/kotlinx-coroutines-core/1.7.3/kotlinx-coroutines-core-1.7.3.pom
202re.compile(r"""[0-9]+c[0-9]+
203< <artifactId>kotlinx-coroutines-core-jvm</artifactId>
204---
Owen Graya04eca32024-10-09 14:47:47 -0400205> <artifactId>kotlinx-coroutines-core</artifactId>"""),
206# AGP-KMP adds a new default sourceSet, which in itself doesn't do anything
207re.compile(r"""(11,17d10|12,18d11)
208< "name": "androidRelease",
209< "dependencies": \[
210< "commonMain"
211< \],
212< "analysisPlatform": "jvm"
213< \},
214< \{
215"""),
Owen Gray877bbd32024-07-19 10:01:20 -0400216]
217
Owen Graye5a112d2024-10-09 14:47:47 -0400218baselines = []
Owen Gray877bbd32024-07-19 10:01:20 -0400219baselinedChanges = []
220unskippableBaselinedChanges = []
221arguments = sys.argv[1:]
222if "agpKmp" in arguments:
Owen Graye5a112d2024-10-09 14:47:47 -0400223 arguments.remove("agpKmp"); baselines += ["agpKmp"]
Owen Graya04eca32024-10-09 14:47:47 -0400224 logger.info("IGNORING DIFF FOR agpKmp")
Owen Gray877bbd32024-07-19 10:01:20 -0400225 baselinedChanges += baselinedChangesForAgpKmp
226 unskippableBaselinedChanges += unskippableBaselinedChangesForAgpKmp
Owen Graya04eca32024-10-09 14:47:47 -0400227 excludedFiles += ["-x", r"**\.aar.unzipped/res"] # agp-kmp may add this empty
Owen Gray877bbd32024-07-19 10:01:20 -0400228if arguments:
Owen Graya04eca32024-10-09 14:47:47 -0400229 logger.error("invalid argument(s) for validateRefactorHelper: " + ", ".join(arguments))
230 logger.error("currently recognized arguments: agpKmp")
Owen Gray877bbd32024-07-19 10:01:20 -0400231 exit()
232
233# interleave "-I" to tell diffutils to 'I'gnore the baselined lines
Owen Graye5a112d2024-10-09 14:47:47 -0400234baselinedChangesArgs = list(itertools.chain.from_iterable(zip(["-I"]*99, [it.removeprefix(">").removeprefix("<") for it in baselinedChanges])))
Owen Gray877bbd32024-07-19 10:01:20 -0400235
Owen Graye5a112d2024-10-09 14:47:47 -0400236def removeLinesStartingWith(listOfStrings, listOfStringsToMatchAgainst):
237 return [line for line in listOfStrings if not any(line.startswith(it) for it in listOfStringsToMatchAgainst)]
Owen Gray877bbd32024-07-19 10:01:20 -0400238
Owen Graye5a112d2024-10-09 14:47:47 -0400239# removeLinesWithChangedSuffixes(["foo"], ["foo-bar"], "-bar") returns [], []
240def removeLinesWithChangedSuffixes(newStrings, oldStrings, newSuffix, oldSuffix=""):
241 possibleIndices = [i for i, string in enumerate(newStrings) if string.endswith(newSuffix)]
242 convertedMap: Dict[int, str] = {i: newStrings[i].replace(newSuffix, oldSuffix) for i in possibleIndices}
243 confirmedIndicesNew = [i for i, converted in convertedMap.items() if converted in oldStrings]
244 confirmedIndicesOld = [oldStrings.index(convertedMap[i]) for i in confirmedIndicesNew]
245 resultNew = [string for i, string in enumerate(newStrings) if i not in confirmedIndicesNew]
246 resultOld = [string for i, string in enumerate(oldStrings) if i not in confirmedIndicesOld]
247 return resultNew, resultOld
248
249# remove baselined elements from a single diff segment, starting with a location-in-file element like 223c220
250def processDiffSegment(segment, fileExtension):
251 if segment == "": return ""
252 lines = segment.split("\n")
253 lines = removeLinesStartingWith(lines, baselinedChanges)
254 removed = [line[1:] for line in lines if line.startswith("<")]
255 added = [line[1:] for line in lines if line.startswith(">")]
256 if (fileExtension == "pom") and "agpKmp" in baselines:
257 # Ignore artifactIds' new -jvm and -android suffixes in poms b/356612738
258 added, removed = removeLinesWithChangedSuffixes(added, removed, "-jvm</artifactId>", "</artifactId>")
259 added, removed = removeLinesWithChangedSuffixes(added, removed, "-android</artifactId>", "</artifactId>")
260 keptContentLines = set(">" + it for it in added).union(set("<" + it for it in removed))
261 # Do not keep any formatting lines or the header if there is no content
262 if len(keptContentLines) == 0: return ""
263 # return value is based on `lines` because we want to retain ordering we may have lost during processing
264 # We want to keep keptContentLines, and formatting lines like "---" and the header (which don't start with <>).
Owen Graya04eca32024-10-09 14:47:47 -0400265 return "\n".join([line for line in lines if (line != "") and ((not line[0] in "<>") or line in keptContentLines)])
Owen Graye5a112d2024-10-09 14:47:47 -0400266
267# The output of diff entails multiple files, and multiple segments per file
268# This function removes baselined changes from the entire diff output
269def processMegaDiff(inputString):
270 perFileDiffs = inputString.split("diff -r")
271 processedPerFileDiffs = []
272 for i in range(1, len(perFileDiffs)):
273 diffStatement, _, diffContent = perFileDiffs[i].partition("\n")
274 newFilePath = diffStatement.rpartition(" ")[2]
275 fileExtension = newFilePath.rpartition(".")[2]
276 for multilineBaselinedElement in unskippableBaselinedChanges:
277 diffContent = multilineBaselinedElement.sub("", diffContent)
278 diffSegments = re.split(r'(^[0-9]+[0-9acd,]*\n)', diffContent, flags=re.MULTILINE)
279 result = []
280 # every other segment is a segment header like 99,112d87; 0th is ""
281 for j in range(1, len(diffSegments)-1, 2):
282 # a complete segment is a location-in-file header and everything until the next header. E.g.
283 # 83c70
284 # < <artifactId>kotlinx-coroutines-core-jvm</artifactId>
285 # ---
286 # > <artifactId>kotlinx-coroutines-core</artifactId>
287 segment = diffSegments[j] + diffSegments[j+1]
288 processedSegment = processDiffSegment(segment, fileExtension)
289 if processedSegment != "": result.append(processedSegment)
290 if len(result) != 0: processedPerFileDiffs += [newFilePath + "\n" + "\n".join(result)]
291 return "\ndiff ".join(processedPerFileDiffs)
Owen Gray877bbd32024-07-19 10:01:20 -0400292
Owen Graya04eca32024-10-09 14:47:47 -0400293# We unzip multiple times in this order because e.g. zips can contain apks.
Owen Gray877bbd32024-07-19 10:01:20 -0400294# Find all zip files with a diff, e.g. the tip-of-tree-repository file, and maybe the docs zip
Owen Graya04eca32024-10-09 14:47:47 -0400295logger.info("UNZIPPING ZIP FILES");
296findFilesNotMatchingWithDiffAndUnzip(r"**\.[^z][a-z]*")
Owen Gray877bbd32024-07-19 10:01:20 -0400297# Find all aar and apk files with a diff. The proper regex would be `.*\..*[^akpr]+.*`, but it
298# doesn"t work in difftools exclude's very limited regex syntax.
Owen Graya04eca32024-10-09 14:47:47 -0400299logger.info("UNZIPPING AAR/APK FILES");
300findFilesNotMatchingWithDiffAndUnzip(r"**\.zip", r"**\.jar", r"**\.klib")
Owen Gray877bbd32024-07-19 10:01:20 -0400301# Find all jars and klibs and unzip them (comes after because they could be inside aars/apks).
Owen Graya04eca32024-10-09 14:47:47 -0400302logger.info("UNZIPPING JAR/KLIB FILES");
303findFilesNotMatchingWithDiffAndUnzip(r"**\.zip", r"**\.aar", r"**\.apk")
304
Owen Gray877bbd32024-07-19 10:01:20 -0400305# now find all diffs in classes.jars
Owen Graya04eca32024-10-09 14:47:47 -0400306# TODO(375636734) Disabled because this tracks internal methods' diffs
307# classesJarsWithDiffs = popenAndReturn(["find", "../../out-old/dist/", "-name", "classes.jar"])
Owen Gray8998a032024-11-13 09:21:26 -0500308# logger.info("classes.jar s: " + str(classesJarsWithDiffs))
Owen Graya04eca32024-10-09 14:47:47 -0400309# compareWithDiffuse(classesJarsWithDiffs)
310
Owen Gray877bbd32024-07-19 10:01:20 -0400311# Now find all diffs in non-zipped files
Owen Graye5a112d2024-10-09 14:47:47 -0400312finalExcludes = excludedHashes + excludedFiles + excludedZips + baselinedChangesArgs
Owen Gray877bbd32024-07-19 10:01:20 -0400313finalDiff = "\n".join(diff(finalExcludes))
Owen Graye5a112d2024-10-09 14:47:47 -0400314finalDiff = processMegaDiff(finalDiff)
Owen Gray877bbd32024-07-19 10:01:20 -0400315print(finalDiff)