diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b8fada9..ba3fcf6 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -21,7 +21,7 @@ jobs: - name: Install pylint run: sudo pip3 install pylint - name: pylint - run: pylint rootfs.py lib/utils.py lib/generator.py lib/target.py --disable=duplicate-code + run: pylint rootfs.py lib/utils.py lib/generator.py lib/target.py check_substitutes.py --disable=duplicate-code shellcheck: name: Lint shell files @@ -42,3 +42,12 @@ jobs: run: sudo pip3 install reuse - name: reuse run: reuse lint + + substitutes: + name: Check validity of substituted tarballs + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v3 + - name: Check validity of substitutes + run: ./check_substitutes.py diff --git a/check_substitutes.py b/check_substitutes.py new file mode 100755 index 0000000..177bf68 --- /dev/null +++ b/check_substitutes.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-3.0-or-later +# +# SPDX-FileCopyrightText: 2024 fosslinux + +"""Check that substituted files are the same.""" +import bz2 +import filecmp +import gzip +import itertools +import lzma +import shutil +import tarfile +import tempfile +import sys +import os + +from lib.generator import Generator + +# Get a temporary directory to work in +working = tempfile.mkdtemp() + +# Colour constants +# pylint: disable=too-few-public-methods +class Colors(): + """ANSI Color Codes""" + GREY = "\033[90m" + RED = "\033[91m" + GREEN = "\033[92m" + ORANGE = "\033[91m\033[93m" + YELLOW = "\033[93m" + END = "\033[0m" + +def traverse_path(base_root): + """Takes a path and returns a set of all directories and files in that path.""" + all_dirs = set() + all_files = set() + for root, directories, files in os.walk(base_root, topdown=True): + for d in directories: + all_dirs.add(os.path.join(root, d).lstrip(base_root)) + for f in files: + all_files.add(os.path.join(root, f).lstrip(base_root)) + return (all_dirs, all_files) + +class Distfile(): + """Represents one distfile and operations performed on it.""" + def __init__(self, i, url): + self.i = i + self.url = url + self.out_file = f"{i}-{os.path.basename(url)}" + self.filepath = "" + + def download(self): + """Downloads the distfile.""" + Generator.download_file(self.url, working, self.out_file, silent=True) + self.filepath = os.path.join(working, self.out_file) + + def decompress(self): + """Decompresses the distfile.""" + compression = self.out_file.split('.')[-1] + decompress_func = { + "gz": gzip.open, + "tgz": gzip.open, + "bz2": bz2.open, + "xz": lzma.open, + "lzma": lzma.open + } + if compression not in decompress_func: + # No decompression needed + return + # Remove the compression extension + new_path = '.'.join(self.filepath.split('.')[:-1]) + # tgz -> .tar + if compression == "tgz": + new_path = f"{new_path}.tar" + # Move the decompressed binary stream to a new file + with decompress_func[compression](self.filepath, 'rb') as fin: + with open(new_path, 'wb') as fout: + shutil.copyfileobj(fin, fout) + self.filepath = new_path + + def extract(self): + """Extracts the distfile.""" + # Sanity check + if not tarfile.is_tarfile(self.filepath): + return + out_dir = os.path.join(working, f"{self.i}") + os.mkdir(out_dir) + with tarfile.open(self.filepath, 'r') as f: + f.extractall(path=out_dir) + self.filepath = out_dir + + # It makes more sense here to label them d1 and d2 rather than have one be self. + # pylint: disable=no-self-argument + def compare(d1, d2): + """Compares the distfile to another distfile.""" + if not os.path.isdir(d1.filepath): + # Compare files + return filecmp.cmp(d1.filepath, d2.filepath, shallow=False) + if not os.path.isdir(d2.filepath): + # Then, d2 is a file and d1 is a directory + return False + # Otherwise it's two directories + dirnames1, filenames1 = traverse_path(d1.filepath) + dirnames2, filenames2 = traverse_path(d2.filepath) + if dirnames1 != dirnames2: + return False + if filenames1 != filenames2: + return False + return filecmp.cmpfiles(d1.filepath, d2.filepath, filenames1, shallow=False) + +def check(*args): + """Check if a list of distfiles are equivalent.""" + notequiv = [] + # Find all pairs that are not equivalent + for pair in itertools.combinations(args, 2): + if pair[0].compare(pair[1]): + print(f"{Colors.GREY}DEBUG: {pair[0].url} is equivalent to {pair[1].url}{Colors.END}") + else: + notequiv.append(pair) + + # Decompress all, and check again + for d in {y for x in notequiv for y in x}: + d.decompress() + for pair in notequiv.copy(): + if pair[0].compare(pair[1]): + # pylint: disable=line-too-long + print(f"{Colors.YELLOW}NOTE: {pair[0].url} is equivalent to {pair[1].url} when decompressed{Colors.END}") + notequiv.remove(pair) + + # Extract all, and check again + for d in {y for x in notequiv for y in x}: + d.extract() + has_error = False + for pair in notequiv: + if pair[0].compare(pair[1]): + # pylint: disable=line-too-long + print(f"{Colors.ORANGE}WARN: {pair[0].url} is equivalent to {pair[1].url} when extracted{Colors.END}") + else: + has_error = True + # pylint: disable=line-too-long + print(f"{Colors.RED}ERROR: {pair[0].url} is not equivalent to {pair[1].url}!{Colors.END}") + + return has_error + +def main(): + """Main function.""" + has_error = False + with open("substitutes", 'r', encoding="utf-8") as f: + for line in f.readlines(): + urls = line.strip().split(' ') + distfiles = [] + for i, url in enumerate(urls): + distfiles.append(Distfile(i, url)) + for distfile in distfiles: + distfile.download() + if check(*distfiles): + has_error = True + sys.exit(has_error) + +if __name__ == "__main__": + main() diff --git a/lib/generator.py b/lib/generator.py index 5ca1e55..69f9913 100755 --- a/lib/generator.py +++ b/lib/generator.py @@ -274,7 +274,7 @@ When in doubt, try deleting the file in question -- it will be downloaded again this script the next time") @staticmethod - def download_file(url, directory, file_name): + def download_file(url, directory, file_name, silent=False): """ Download a single source archive. """ @@ -290,7 +290,8 @@ this script the next time") "User-Agent": "curl/7.88.1" } if not os.path.isfile(abs_file_name): - print(f"Downloading: {file_name}") + if not silent: + print(f"Downloading: {file_name}") response = requests.get(url, allow_redirects=True, stream=True, headers=headers, timeout=20) if response.status_code == 200: diff --git a/steps/diffutils-2.7/sources b/steps/diffutils-2.7/sources index 8e853f0..960ac12 100644 --- a/steps/diffutils-2.7/sources +++ b/steps/diffutils-2.7/sources @@ -1 +1 @@ -https://dept.rpi.edu/acm/packages/diffutils/2.7/distrib/diffutils-2.7.tar.bz2 fd6c44f7cbd0a942a3f0c012365997965451197ad4faeb0b8aac1fe03192de58 +https://mirrors.kernel.org/gnu/diffutils/diffutils-2.7.tar.gz d5f2489c4056a31528e3ada4adacc23d498532b0af1a980f2f76158162b139d6 diff --git a/substitutes b/substitutes new file mode 100644 index 0000000..a28c6ee --- /dev/null +++ b/substitutes @@ -0,0 +1,3 @@ +https://mirrors.kernel.org/gnu/bash/bash-2.05b.tar.gz https://src.fedoraproject.org/repo/pkgs/bash/bash-2.05b.tar.bz2/f3e5428ed52a4f536f571a945d5de95d/bash-2.05b.tar.bz2 +https://mirrors.kernel.org/gnu/bc/bc-1.07.1.tar.gz https://mirrors.kernel.org/slackware/slackware64-15.0/source/ap/bc/bc-1.07.1.tar.xz +https://mirrors.kernel.org/gnu/ed/ed-1.4.tar.gz https://mirrors.kernel.org/slackware/slackware-13.37/source/a/ed/ed-1.4.tar.xz diff --git a/substitutes.license b/substitutes.license new file mode 100644 index 0000000..4afb897 --- /dev/null +++ b/substitutes.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2024 fosslinux + +SPDX-License-Identifier: GPL-3.0-or-later