diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 639d3d3..e2000ab 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -21,7 +21,7 @@ jobs: - name: Install pylint run: sudo pip3 install pylint - name: pylint - run: pylint rootfs.py sysa.py sysc.py lib/utils.py lib/sysgeneral.py lib/tmpdir.py --disable=duplicate-code + run: pylint rootfs.py lib/utils.py lib/generator.py lib/tmpdir.py --disable=duplicate-code shellcheck: name: Lint shell files @@ -30,7 +30,7 @@ jobs: - name: Checkout repo uses: actions/checkout@v3 - name: shellcheck - run: shellcheck sysa/run.sh sysa/run2.sh sysb/init sysb/run.sh sysc/init sysc/run.sh sysa/helpers.sh download-distfiles.sh + run: shellcheck steps/helpers.sh download-distfiles.sh reuse: name: Lint reuse information diff --git a/.gitignore b/.gitignore index 125eaad..8908b92 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ tmp/ kernel -sysa/distfiles/ -sysc/distfiles/ +distfiles/ __pycache__ -sysa/bootstrap.cfg +steps/bootstrap.cfg diff --git a/.gitmodules b/.gitmodules index 942e4ed..e3b6883 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,7 +2,6 @@ # SPDX-FileCopyrightText: 2021 fosslinux # # SPDX-License-Identifier: MIT - -[submodule "sysa/stage0-posix/src"] - path = sysa/stage0-posix/src - url = https://github.com/oriansj/stage0-posix/ +[submodule "seed/stage0-posix"] + path = seed/stage0-posix + url = https://github.com/oriansj/stage0-posix diff --git a/.reuse/dep5 b/.reuse/dep5 index e499642..543395c 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -9,6 +9,6 @@ Source: https://github.com/fosslinux/live-bootstrap # Copyright: $YEAR $NAME <$CONTACT> # License: ... -Files: sys*/*/sources sysa/*/*.checksums sysa/SHA256SUMS.pkgs sysa/*/simple-patches/* +Files: steps/*/sources steps/*/*.checksums steps/SHA256SUMS.pkgs steps/*/simple-patches/* steps/pre-network-sources Copyright: none License: MIT diff --git a/DEVEL.md b/DEVEL.md index 7cd866e..db3d89a 100644 --- a/DEVEL.md +++ b/DEVEL.md @@ -14,31 +14,40 @@ and that a full build completes. ## Structure -Each system corresponds to a reboot of the live environment. There is only one -appropriate structure as shown below (eg for sysa): - ``` -sysa -├── any-global-files.sh +seed +├── seed.kaem +├── script-generator.c +├── ... +└── stage0-posix + +steps +├── manifest +├── any-global-files +├── jump +│   └── linux.sh +├── improve +│   └── x.sh ├── somepackage-version -│   ├── somepackage-version.kaem (or .sh) +│   ├── pass1.kaem +│   ├── pass2.sh │   ├── files │   ├── simple-patches │   ├── mk │   └── patches -└── tmp ``` -Global scripts that drive the entire system go directly under `sysx`. `tmp` -contains the temporary system used for QEMU or a chroot. +The `seed` directory contains everything required for `script-generator` to be +run. -Then, each package is in its own specific directory, named `package-version`. -It then diverges based upon which driver is being used: - -- `kaem`: A file named `package-version.kaem` is called by the master script. -- `bash`: The `build` function from helper.sh is called from the master script. - There are default functions run which can be overridden by an optional script - `package-version.sh` within the package-specific directory. +In the `steps` directory, the bootstrap process is defined in `manifest`. +Each package to be built is named `package-version`. +Each subsequent build of a package is the nth pass. Scripts are named +accordingly; eg, the first build would be called `pass1.sh`, the second would be +`pass2.sh`, etc. +Scripts run in kaem era should be denoted as such in their filename; +`pass1.kaem`, for example. Pass numbers do not reset after kaem, ie, you cannot +have both `pass1.kaem` and `pass1.sh`. In this folder, there are other folders/files. `*.checksums` are required for early packages that are build with kaem, others are optional. @@ -51,21 +60,16 @@ Permissible folders/files: - `simple-patches`: patches for the source that use the before/after convention of simple-patch.c - `*.checksums`: files containing the checksums for the resulting binaries and libraries that are compiled and installed. - - Up to and including `coreutils-6.10`, `sha256sum` from `stage0-posix` - is used for the checksumming. Later we switch to GNU version. - - To extract checksums of the binaries, use of qemu mode is recommended - (i.e. `./rootfs.py -q -qk $kernel --update-checksums`). -- compilation script - -The directory m2-functions is used for M2-Planet functions (currently). + - Otherwise, the package's checksum is in SHA256SUMS.pkgs. +- compilation script(s) ## Conventions - **Patches:** - all patches are `-p0` - all patches begin with a patch header -- **README:** - - all stages are explained in README +- **parts.rst:** + - all packages are explained in `parts.rst` - **General:** - Where possible, all blocks of text should be limited to a length of 80 characters. @@ -79,9 +83,3 @@ The directory m2-functions is used for M2-Planet functions (currently). - Patches are licensed under the license of the project which they are patching. - All files (excluding files within submodules) must comply with REUSE v3.0. - -## git - -All changes must be submitted as PRs. Pushing to master is disallowed, even if -push access is granted to a user. Only pushes to master should be merging of -patches into master. diff --git a/README.rst b/README.rst index 49eca8c..b14116c 100644 --- a/README.rst +++ b/README.rst @@ -12,94 +12,90 @@ An attempt to provide a reproducible, automatic, complete end-to-end bootstrap from a minimal number of binary seeds to a supported fully functioning operating system. -Get me started! ---------------- +How do I use this? +------------------ + +Quick start: + +See ``./rootfs.py --help`` and follow the instructions given there. +This uses a variety of userland tools to prepare the bootstrap. + +(*Currently, there is no way to perform the bootstrap without external +preparations! This is a currently unsolved problem.*) + +Without using Python: 1. ``git clone https://github.com/fosslinux/live-bootstrap`` 2. ``git submodule update --init --recursive`` -3. Provide a kernel (vmlinuz file) as the name ``kernel`` in the root of the - repository. **This must be a 32-bit kernel.** -4. ``./rootfs.py --qemu`` - ensure your account has kvm privileges and qemu - installed. - - a. Alternatively, run ``./rootfs.py --chroot`` to run it in a chroot. - b. Alternatively, run ``./rootfs.py --bwrap`` to run it in a bubblewrap - sandbox. When user namespaces are supported, this mode is rootless. - c. Alternatively, run ``./rootfs.py`` but don’t run the actual - virtualization and instead copy sysa/tmp/initramfs to a USB or - some other device and boot from bare metal. NOTE: we now require - a hard drive. This is currently hardcoded as sda. You also need - to put ``sysc/tmp/disk.img`` onto your sda on the bootstrapping - machine. - d. Alternatively, do not use python at all, see "Python-less build" - below. - -5. Wait. -6. If you can, observe the many binaries in ``/usr/bin``! When the - bootstrap is completed ``bash`` is launched providing a shell to - explore the system. - +3. Consider whether you are going to run this in a chroot, in QEMU, or on bare + metal. (All of this *can* be automated, but not in a trustable way. See + further below.) + a. **chroot:** Create a directory where the chroot will reside, run + ``./download-distfiles.sh``, and copy: + * The entire contents of ``seed/stage0-posix`` into that directory. + * All other files in ``seed`` into that directory. + * ``steps/`` and ``distfiles/`` into that directory. + * At least all files listed in ``steps/pre-network-sources`` must be + copied in. All other files will be obtained from the network. + * Run ``/bootstrap-seeds/POSIX/x86/kaem-optional-seed`` in the chroot. + (Eg, ``chroot rootfs /bootstrap-seeds/POSIX/x86/kaem-optional-seed``). + b. **QEMU:** Create two blank disk images. + * On the first image, write + ``seed/stage0-posix/bootstrap-seeds/NATIVE/x86/builder-hex0-x86-stage1.img`` + to it, followed by ``kernel-bootstrap/builder-hex0-x86-stage2.hex0``, + followed by zeros padding the disk to the next sector. + * distfiles can be obtained using ``./download-distfiles.sh``. + * See the list in part a. For every file within that list, write a line to + the disk ``src ``, followed by the contents + of the file. + * *Only* copy distfiles listed in ``steps/pre-network-sources`` into + this disk. + * Optionally (if you don't do this, distfiles will be network downloaded): + * On the second image, create an MSDOS partition table and one ext3 + partition. + * Copy ``distfiles/`` into this disk. + * Run QEMU, with 4+G RAM, optionally SMP (multicore), both drives (in the + order introduced above), a NIC with model E1000 (``-nic + user,model=e1000``), and ``-machine kernel-irqchip=split``. + c. **Bare metal:** Follow the same steps as QEMU, but the disks need to be + two different *physical* disks, and boot from the first disk. Background ---------- -This project is a part of the bootstrappable project, a project that -aims to be able to build complete computing platforms through the use of -source code. When you build a compiler like GCC, you need another C -compiler to compile the compiler - turtles all the way down. Even the -first GCC compiler was written in C. There has to be a way to break the -chain… +Problem statement +================= -There has been significant work on this over the last 5 years, from -Jeremiah Orians’ stage0, hex2 and M2-Planet to janneke’s Mes. We have a -currently, fully-functioning chain of bootstrapping from the 357-byte -hex0 seed to a complete GCC compiler and hence a full Linux operating -system. From there, it is trivial to move to other UNIXes. However, -there is only currently one vector through which this can be -automatically done, GNU Guix. +live-bootstrap's overarching problem statement is; -While the primary author of this project does not believe Guix is a bad -project, the great reliance on Guile, the complexity of many of the -scripts and the rather steep learning curve to install and run Guix make -it a very non plug-and-play solution. Furthermore, there is currently -(Jan 2021) no possible way to run the bootstrap from outside of a -pre-existing Linux environment. Additionally, Guix uses many scripts and -distributed files that cannot be considered source code. +> How can a usable Linux system be created with only human-auditable, and +wherever possible, human-written, source code? -(NOTE: Guix is working on a Full Source Bootstrap, but I’m not -completely sure what that entails). +Clarifications: -Furthermore, having an alternative bootstrap automation tool allows -people to have greater trust in the bootstrap procedure. +* "usable" means a modern toolchain, with appropriate utilities, that can be + used to expand the amount of software on the system, interactively, or + non-interactively. +* "human-auditable" is discretionary, but is usually fairly strict. See + "Specific things to be bootstrapped" below. -Comparison between GNU Guix and live-bootstrap ----------------------------------------------- +Why is this difficult? +====================== -+----------------------+----------------------+----------------------+ -| Item | Guix | live-bootstrap | -+======================+======================+======================+ -| Total size of seeds | ~30MB (Reduced | ~1KB | -| [1] | Source Bootstrap) | | -| | [2] | | -+----------------------+----------------------+----------------------+ -| Use of kernel | Linux-Libre Kernel | Any Linux Kernel | -| | | (2.6+) [3] | -+----------------------+----------------------+----------------------+ -| Implementation | Yes | No (in development) | -| complete | | | -+----------------------+----------------------+----------------------+ -| Automation | Almost fully | Optional user | -| | automatic | customization | -+----------------------+----------------------+----------------------+ +The core of a modern Linux system is primarily written in C and C++. C and C++ +are **self-hosting**, ie, nearly every single C compiler is written in C. -[1]: Both projects only use software licensed under a FSF-approved -free software license. Kernel is excluded from seed. -[2]: Reiterating that Guix is working on a full source bootstrap, -although that still uses guile (~12 MB). [3]: Work is ongoing to use -other, smaller POSIX kernels. +Every single version of GCC was written in C. To avoid using an existing +toolchain, we need some way to be able to compile a GCC version without C. We +can use a less well-featured compiler, TCC, to do this. And so forth, until we +get to a fairly primitive C compiler written in assembly, ``cc_x86``. -Why would I want bootstrapping? -------------------------------- +Going up through this process requires a bunch of other utilities as well; the +autotools suite, guile and autogen, etc. These also have to be matched +appropriately to the toolchain available. + +Why should I care? +------------------ That is outside of the scope of this README. Here’s a few things you can look at: @@ -117,7 +113,7 @@ bootstrapping. However, there are a number of non-auditable files used in many of their packages. Here is a list of file types that we deem unsuitable for bootstrapping. -1. Binaries (apart from seed hex0, kaem, kernel). +1. Binaries (apart from seed hex0, kaem, builder-hex0). 2. Any pre-generated configure scripts, or Makefile.in’s from autotools. 3. Pre-generated bison/flex parsers (identifiable through a ``.y`` file). @@ -131,56 +127,18 @@ How does this work? **For a more in-depth discussion, see parts.rst.** -sysa -~~~~ +Firstly, ``builder-hex0`` is launched. ``builder-hex0`` is a minimal kernel that is +written in ``hex0``, existing in 3 self-bootstrapping stages. -sysa is the first ‘system’ used in live-bootstrap. We move to a new -system after a reboot, which often occurs after the movement to a new -kernel. It is run by the seed Linux kernel provided by the user. It -compiles everything we need to be able to compile our own Linux kernel. -It runs fully in an initramfs and does not rely on disk support in the -seed Linux kernel. +This is capable of executing the entirety of ``stage0-posix``, (see +``seed/stage0-posix``), which produces a variety of useful utilities and a basic +C language, ``M2-Planet``. -sysb -~~~~ +``stage0-posix`` runs a file called ``after.kaem``. This is a shell script that +builds and runs a small program called ``script-generator``. This program reads +``steps/manifest`` and converts it into a series of shell scripts that can be +executed in sequence to complete the bootstrap. -sysb is the second 'system' of live-bootstrap. This uses the Linux 4.9.10 -kernel compiled within sysa. As we do not rely on disk support in sysa, we -need this intermediate system to be able to add the missing binaries to sysc -before moving into it. This is executed through kexec from sysa. At this point, -a SATA disk IS required. - -sysc -~~~~ - -sysc is the (current) last 'system' of live-bootstrap. This is a continuation -from sysb, executed through util-linux's ``switch_root`` command which moves -the entire rootfs without a reboot. Every package from here on out is compiled -under this system, taking binaries from sysa. Chroot and bubblewrap modes skip -sysb, as it is obviously irrelevant to them. - -Python-less build ------------------ - -Python is no longer a requirement to set up the build system. The -repository is almost completely in a form where it can be used as the -source of a build. - -1. Download required tarballs into ``sysa/distfiles`` and ``sysc/distfiles``. - You can use the ``download-distfiles.sh`` script. -2. Copy sysa/stage0-posix/src/* to the root of the repository. -3. Copy sysa/stage0-posix/src/bootstrap-seeds/POSIX/x86/kaem-optional-seed - to init in the root of the repository. -4. Copy sysa/after.kaem to after.kaem -5. Create a CPIO archive (eg, ``cpio --format newc --create --directory . > ../initramfs``). -6. Boot your initramfs and kernel. - -chroot builds -~~~~~~~~~~~~~ - -For chroot based bootstraps you can skip creation of initramfs and instead start bootstrap with - -``sudo chroot . bootstrap-seeds/POSIX/x86/kaem-optional-seed`` - -It is also recommended to copy everything to a new directory as bootstrapping messes up with files -in git repository and cannot be re-run again. +From this point forward, ``steps/manifest`` is effectively self documenting. +Each package built exists in ``steps/``, and the build scripts can be seen +there. diff --git a/download-distfiles.sh b/download-distfiles.sh index 7b08e34..e4c12b9 100755 --- a/download-distfiles.sh +++ b/download-distfiles.sh @@ -20,27 +20,18 @@ download_source() { echo "${checksum} ${dest_path}" | sha256sum -c } -download_for_sys() { - local sysdir="${1}" - local distfiles="${sysdir}/distfiles" - - mkdir -p "${distfiles}" - - local entry - for entry in "${sysdir}"/*; do - [ -e "${entry}/sources" ] || continue - - local line - # shellcheck disable=SC2162 - while read line; do - # This is intentional - we want to split out ${line} into separate arguments. - # shellcheck disable=SC2086 - download_source "${distfiles}" ${line} - done < "${entry}/sources" - done -} - set -e + cd "$(dirname "$(readlink -f "$0")")" -download_for_sys sysa -download_for_sys sysc +mkdir -p distfiles + +for entry in steps/*; do + [ -e "${entry}/sources" ] || continue + + # shellcheck disable=SC2162 + while read line; do + # This is intentional - we want to split out ${line} into separate arguments. + # shellcheck disable=SC2086 + download_source distfiles ${line} + done < "${entry}/sources" +done diff --git a/lib/generator.py b/lib/generator.py new file mode 100755 index 0000000..e1dbac5 --- /dev/null +++ b/lib/generator.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +""" +This file contains all code required to generate the boot image for live-bootstrap +""" +# SPDX-License-Identifier: GPL-3.0-or-later +# SPDX-FileCopyrightText: 2022-2023 Dor Askayo +# SPDX-FileCopyrightText: 2021 Andrius Štikonas +# SPDX-FileCopyrightText: 2021 Melg Eight +# SPDX-FileCopyrightText: 2021-23 fosslinux + +import hashlib +import os +import shutil +import tarfile +import requests + +class Generator(): + """ + Class responsible for generating the basic media to be consumed. + """ + + git_dir = os.path.join(os.path.dirname(os.path.join(__file__)), '..') + distfiles_dir = os.path.join(git_dir, 'distfiles') + + # pylint: disable=too-many-arguments + def __init__(self, tmpdir, arch, external_sources, + early_preseed, repo_path): + self.arch = arch + self.early_preseed = early_preseed + self.external_sources = external_sources + self.repo_path = repo_path + self.tmpdir = tmpdir + self.tmp_dir = tmpdir.path + self.external_dir = os.path.join(self.tmp_dir, 'external') + + def prepare(self, using_kernel=False, kernel_bootstrap=False): + """ + Prepare basic media of live-bootstrap. + /steps -- contains steps to be built + / -- contains seed to allow steps to be built, containing custom + scripts and stage0-posix + """ + # We use ext3 here; ext4 actually has a variety of extensions that + # have been added with varying levels of recency + # Linux 4.9.10 does not support a bunch of them + # Attempting to disable extensions that a particular e2fsprogs + # is *unaware* of causes the filesystem creation to fail + # We could hypothetically detect e2fsprogs version and create an + # argument matrix ... or we could just use ext3 instead which + # is effectively universally the same + if kernel_bootstrap: + init_path = os.path.join(self.tmp_dir, 'init') + + os.mkdir(init_path) + self.tmp_dir = init_path + + if self.repo_path or self.external_sources: + self.tmpdir.add_disk("external", filesystem="ext3") + self.tmpdir.mount_disk("external", "external") + else: + self.tmpdir.add_disk("external", tabletype="none") + elif using_kernel: + self.tmp_dir = os.path.join(self.tmp_dir, 'disk') + self.tmpdir.add_disk("disk", filesystem="ext3") + self.tmpdir.mount_disk("disk", "disk") + self.external_dir = os.path.join(self.tmp_dir, 'external') + + os.makedirs(self.external_dir, exist_ok=True) + + if self.early_preseed: + # Extract tar containing preseed + with tarfile.open(self.early_preseed, "r") as seed: + seed.extractall(self.tmp_dir) + shutil.copy2(os.path.join(self.git_dir, 'seed', 'preseeded.kaem'), + os.path.join(self.tmp_dir, 'kaem.x86')) + else: + self.stage0_posix() + self.seed() + + self.steps() + + self.distfiles() + + self.create_fiwix_file_list() + + if self.repo_path: + repo_dir = os.path.join(self.external_dir, 'repo-preseeded') + shutil.copytree(self.repo_path, repo_dir) + + if kernel_bootstrap: + self.create_builder_hex0_disk_image(os.path.join(self.tmp_dir, 'disk.img')) + + if kernel_bootstrap and (self.external_sources or self.repo_path): + self.tmpdir.umount_disk('external') + elif using_kernel: + self.tmpdir.umount_disk('disk') + + def steps(self): + """Copy in steps.""" + source_manifest = self.get_source_manifest() + self.get_packages(source_manifest) + + shutil.copytree(os.path.join(self.git_dir, 'steps'), os.path.join(self.tmp_dir, 'steps')) + + def stage0_posix(self): + """Copy in all of the stage0-posix""" + stage0_posix_base_dir = os.path.join(self.git_dir, 'seed', 'stage0-posix') + for f in os.listdir(stage0_posix_base_dir): + orig = os.path.join(stage0_posix_base_dir, f) + to = os.path.join(self.tmp_dir, f) + if os.path.isfile(orig): + shutil.copy2(orig, to) + else: + shutil.copytree(orig, to) + + arch = stage0_arch_map.get(self.arch, self.arch) + kaem_optional_seed = os.path.join(self.git_dir, 'seed', 'stage0-posix', 'bootstrap-seeds', + 'POSIX', arch, 'kaem-optional-seed') + shutil.copy2(kaem_optional_seed, os.path.join(self.tmp_dir, 'init')) + + def seed(self): + """Copy in extra seed files""" + seed_dir = os.path.join(self.git_dir, 'seed') + for f in os.listdir(seed_dir): + if os.path.isfile(os.path.join(seed_dir, f)): + shutil.copy2(os.path.join(seed_dir, f), os.path.join(self.tmp_dir, f)) + + def add_fiwix_files(self, file_list_path, dirpath): + """Add files to the list to populate Fiwix file system""" + for root, _, filepaths in os.walk(dirpath): + if 'stage0-posix' in root: + continue + with open(file_list_path, 'a', encoding="utf-8") as file_list: + for filepath in filepaths: + file_list.write(f"/{os.path.join(root, filepath)}\n") + + def create_fiwix_file_list(self): + """Create a list of files to populate Fiwix file system""" + file_list_path = os.path.join(self.tmp_dir, 'steps', 'lwext4-1.0.0-lb1', + 'files', 'fiwix-file-list.txt') + shutil.copyfile(os.path.join(self.tmp_dir, 'steps', 'lwext4-1.0.0-lb1', + 'files', 'early-artifacts-needed-after-fiwix.txt'), + file_list_path) + + save_cwd = os.getcwd() + os.chdir(self.tmp_dir) + self.add_fiwix_files(file_list_path, 'steps') + self.add_fiwix_files(file_list_path, 'distfiles') + os.chdir(save_cwd) + + def distfiles(self): + """Copy in distfiles""" + def copy_no_network_distfiles(out): + # Note that no network == no disk for kernel bootstrap mode + pre_src_path = os.path.join(self.git_dir, 'steps', 'pre-network-sources') + with open(pre_src_path, 'r', encoding="utf-8") as source_list: + for file in source_list.readlines(): + file = file.strip() + shutil.copy2(os.path.join(self.distfiles_dir, file), + os.path.join(out, file)) + + early_distfile_dir = os.path.join(self.tmp_dir, 'external', 'distfiles') + main_distfile_dir = os.path.join(self.external_dir, 'distfiles') + + if early_distfile_dir != main_distfile_dir: + os.makedirs(early_distfile_dir) + copy_no_network_distfiles(early_distfile_dir) + + if self.external_sources: + os.mkdir(main_distfile_dir) + shutil.copytree(self.distfiles_dir, main_distfile_dir) + else: + os.mkdir(main_distfile_dir) + copy_no_network_distfiles(main_distfile_dir) + + @staticmethod + def output_dir(srcfs_file, dirpath): + """Add a directory to srcfs file system""" + srcline = f"src 0 {dirpath}\n" + srcfs_file.write(srcline.encode()) + + @staticmethod + def output_file(srcfs_file, filepath): + """Add a file to srcfs file system""" + srcline = f"src {os.path.getsize(filepath)} {filepath}\n" + srcfs_file.write(srcline.encode()) + with open(filepath, 'rb') as srcfile: + srcfs_file.write(srcfile.read()) + + def output_tree(self, srcfs_file, treepath): + """Add a tree of files to srcfs file system""" + self.output_dir(srcfs_file, treepath) + for root, dirs, files in os.walk(treepath): + if ".git" in root: + continue + for dirpath in dirs: + if ".git" in dirpath: + continue + self.output_dir(srcfs_file, os.path.join(root, dirpath)) + + for filepath in files: + if ".git" in filepath: + continue + self.output_file(srcfs_file, os.path.join(root, filepath)) + + def append_srcfs(self, image_file): + """Append srcfs file system to disk image""" + save_cwd = os.getcwd() + + os.chdir(self.tmp_dir) + self.output_tree(image_file, '.') + + # Add commands to kick off stage0-posix + cmd = ' '.join(['hex0', + './bootstrap-seeds/POSIX/x86/hex0_x86.hex0', + './bootstrap-seeds/POSIX/x86/hex0-seed\n']) + image_file.write(cmd.encode()) + cmd = ' '.join(['hex0', + './bootstrap-seeds/POSIX/x86/kaem-minimal.hex0', + './bootstrap-seeds/POSIX/x86/kaem-optional-seed\n']) + image_file.write(cmd.encode()) + cmd = ' '.join(['./bootstrap-seeds/POSIX/x86/kaem-optional-seed', './kaem.x86\n']) + image_file.write(cmd.encode()) + + os.chdir(save_cwd) + + def create_builder_hex0_disk_image(self, image_file_name): + """Create builder-hex0 disk image""" + shutil.copyfile(os.path.join('seed', 'stage0-posix', 'bootstrap-seeds', + 'NATIVE', 'x86', 'builder-hex0-x86-stage1.img'), + image_file_name) + + with open(image_file_name, 'ab') as image_file: + # Append stage2 hex0 source + with open(os.path.join('kernel-bootstrap', 'builder-hex0-x86-stage2.hex0'), + encoding="utf-8") as infile: + image_file.write(infile.read().encode()) + # Pad to next sector + current_size = os.stat(image_file_name).st_size + while current_size % 512 != 0: + image_file.write(b'\0') + current_size += 1 + self.append_srcfs(image_file) + + current_size = os.stat(image_file_name).st_size + + megabyte = 1024 * 1024 + # fill file with zeros up to next megabyte + extra = current_size % megabyte + round_up = megabyte - extra + with open(image_file_name, 'ab') as image_file: + image_file.write(b'\0' * round_up) + current_size += round_up + + # fill file with zeros up to desired size, one megabyte at a time + with open(image_file_name, 'ab') as image_file: + while current_size < 16384 * megabyte: + image_file.write(b'\0' * megabyte) + current_size += megabyte + + def check_file(self, file_name, expected_hash): + """Check hash of downloaded source file.""" + with open(file_name, "rb") as downloaded_file: + downloaded_content = downloaded_file.read() # read entire file as bytes + readable_hash = hashlib.sha256(downloaded_content).hexdigest() + if expected_hash == readable_hash: + return + raise ValueError(f"Checksum mismatch for file {os.path.basename(file_name)}:\n\ +expected: {expected_hash}\n\ +actual: {readable_hash}\n\ +When in doubt, try deleting the file in question -- it will be downloaded again when running \ +this script the next time") + + def download_file(self, url, directory, file_name): + """ + Download a single source archive. + """ + abs_file_name = os.path.join(directory, file_name) + + # Create a directory for downloaded file + if not os.path.isdir(directory): + os.mkdir(directory) + + # Actually download the file + headers = { + "Accept-Encoding": "identity" + } + if not os.path.isfile(abs_file_name): + print(f"Downloading: {file_name}") + response = requests.get(url, allow_redirects=True, stream=True, + headers=headers, timeout=20) + if response.status_code == 200: + with open(abs_file_name, 'wb') as target_file: + target_file.write(response.raw.read()) + else: + raise requests.HTTPError("Download failed.") + return abs_file_name + + def get_packages(self, source_manifest): + """Prepare remaining sources""" + for line in source_manifest.split("\n"): + line = line.strip().split(" ") + + path = self.download_file(line[2], line[1], line[3]) + self.check_file(path, line[0]) + + @classmethod + def get_source_manifest(cls): + """ + Generate a source manifest for the system. + """ + manifest_lines = [] + directory = os.path.relpath(cls.distfiles_dir, cls.git_dir) + + # Find all source files + steps_dir = os.path.join(cls.git_dir, 'steps') + for file in os.listdir(steps_dir): + if os.path.isdir(os.path.join(steps_dir, file)): + sourcef = os.path.join(steps_dir, file, "sources") + if os.path.exists(sourcef): + # Read sources from the source file + with open(sourcef, "r", encoding="utf_8") as sources: + for line in sources.readlines(): + line = line.strip().split(" ") + + if len(line) > 2: + file_name = line[2] + else: + # Automatically determine file name based on URL. + file_name = os.path.basename(line[0]) + + manifest_lines.append(f"{line[1]} {directory} {line[0]} {file_name}") + + return "\n".join(manifest_lines) + +stage0_arch_map = { + "amd64": "AMD64", +} diff --git a/lib/sysgeneral.py b/lib/sysgeneral.py deleted file mode 100644 index 1938645..0000000 --- a/lib/sysgeneral.py +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env python3 -""" -This file contains a few functions to be shared by all Sys* classes -""" - -# SPDX-FileCopyrightText: 2022-2023 Dor Askayo -# SPDX-FileCopyrightText: 2021-23 fosslinux -# SPDX-FileCopyrightText: 2021 Andrius Štikonas -# SPDX-License-Identifier: GPL-3.0-or-later - -import os -import hashlib -import glob -import subprocess - -import requests - -class SysGeneral: - """ - A class from which all Sys* class are extended. - Contains functions used in all Sys* - """ - - # All of these are variables defined in the individual Sys* classes - cache_dir = None - base_dir = None - git_dir = None - sys_dir = None - initramfs_path = None - tmp_dir = None - - def check_file(self, file_name, expected_hash): - """Check hash of downloaded source file.""" - with open(file_name, "rb") as downloaded_file: - downloaded_content = downloaded_file.read() # read entire file as bytes - readable_hash = hashlib.sha256(downloaded_content).hexdigest() - if expected_hash == readable_hash: - return - raise ValueError(f"Checksum mismatch for file {os.path.basename(file_name)}:\n\ -expected: {expected_hash}\n\ -actual: {readable_hash}\n\ -When in doubt, try deleting the file in question -- it will be downloaded again when running \ -this script the next time") - - def download_file(self, url, directory, file_name): - """ - Download a single source archive. - """ - abs_file_name = os.path.join(directory, file_name) - - # Create a directory for downloaded file - if not os.path.isdir(directory): - os.mkdir(directory) - - # Actually download the file - headers = { - "Accept-Encoding": "identity" - } - if not os.path.isfile(abs_file_name): - print(f"Downloading: {file_name}") - response = requests.get(url, allow_redirects=True, stream=True, - headers=headers, timeout=20) - if response.status_code == 200: - with open(abs_file_name, 'wb') as target_file: - target_file.write(response.raw.read()) - else: - raise requests.HTTPError("Download failed.") - return abs_file_name - - def get_packages(self, source_manifest): - """Prepare remaining sources""" - for line in source_manifest.split("\n"): - line = line.strip().split(" ") - - path = self.download_file(line[2], line[1], line[3]) - self.check_file(path, line[0]) - - @classmethod - def get_source_manifest(cls): - """ - Generate a source manifest for the system. - """ - manifest_lines = [] - directory = os.path.relpath(cls.cache_dir, cls.git_dir) - - # Find all source files - for file in os.listdir(cls.sys_dir): - if os.path.isdir(os.path.join(cls.sys_dir, file)): - sourcef = os.path.join(cls.sys_dir, file, "sources") - if os.path.exists(sourcef): - # Read sources from the source file - with open(sourcef, "r", encoding="utf_8") as sources: - for line in sources.readlines(): - line = line.strip().split(" ") - - if len(line) > 2: - file_name = line[2] - else: - # Automatically determine file name based on URL. - file_name = os.path.basename(line[0]) - - manifest_lines.append(f"{line[1]} {directory} {line[0]} {file_name}") - - return "\n".join(manifest_lines) - - def make_initramfs(self): - """Package binary bootstrap seeds and sources into initramfs.""" - self.initramfs_path = os.path.join(self.tmp_dir, 'initramfs') - - # Create a list of files to go within the initramfs - file_list = glob.glob(os.path.join(self.tmp_dir, '**'), recursive=True) - - # Use built-in removeprefix once we can use Python 3.9 - def remove_prefix(text, prefix): - if text.startswith(prefix): - return text[len(prefix):] - return text # or whatever - - file_list = [remove_prefix(f, self.tmp_dir + os.sep) for f in file_list] - - # Create the initramfs - with open(self.initramfs_path, "w", encoding="utf_8") as initramfs: - # pylint: disable=consider-using-with - cpio = subprocess.Popen( - ["cpio", "--format", "newc", "--create", - "--directory", self.tmp_dir], - stdin=subprocess.PIPE, stdout=initramfs) - cpio.communicate(input='\n'.join(file_list).encode()) - -stage0_arch_map = { - "amd64": "AMD64", -} diff --git a/lib/tmpdir.py b/lib/tmpdir.py index ee607da..f72286f 100644 --- a/lib/tmpdir.py +++ b/lib/tmpdir.py @@ -24,7 +24,6 @@ class Tmpdir: Represents a tmpdir """ - _syses = {} _disks = {} _disk_filesystems = {} _mountpoints = {} @@ -60,19 +59,11 @@ class Tmpdir: mount("tmpfs", self.path, "tmpfs", f"size={size}") self._type = TmpType.TMPFS - def add_sys(self, name, subdir=None): - """Create a subdirectory and register a sys""" - if subdir is None: - subdir = name - sys_path = os.path.join(self.path, name) - if not os.path.exists(sys_path): - os.mkdir(sys_path) - return sys_path - - def add_disk(self, name, size="16G", filesystem="ext4"): + # pylint: disable=too-many-arguments + def add_disk(self, name, size="16G", filesystem="ext4", tabletype="msdos", mkfs_args=None): """Add a disk""" disk_path = os.path.join(self.path, f"{name}.img") - self._disks[name] = create_disk(disk_path, "msdos", filesystem, size) + self._disks[name] = create_disk(disk_path, tabletype, filesystem, size, mkfs_args=mkfs_args) self._disk_filesystems[name] = filesystem # Allow executing user to access it run_as_root("chown", getpass.getuser(), self._disks[name]) diff --git a/lib/utils.py b/lib/utils.py index 5c6bfe2..a80d656 100755 --- a/lib/utils.py +++ b/lib/utils.py @@ -31,8 +31,10 @@ def run_as_root(*args, **kwargs): return run("sudo", *args, **kwargs) return run(*args, **kwargs) -def create_disk(image, disk_type, fs_type, size): +def create_disk(image, disk_type, fs_type, size, mkfs_args=None): """Create a disk image, with a filesystem on it""" + if mkfs_args is None: + mkfs_args = [] run('truncate', '-s', size, image) # First find the device we will use, then actually use it loop_dev = run_as_root('losetup', '-f', capture_output=True).stdout.decode().strip() @@ -40,9 +42,9 @@ def create_disk(image, disk_type, fs_type, size): # Create the partition if disk_type != "none": run_as_root('parted', '--script', image, 'mklabel', disk_type, 'mkpart', - 'primary', 'ext4', '0%', '100%') + 'primary', fs_type, '0%', '100%') run_as_root('partprobe', loop_dev) - run_as_root('mkfs.' + fs_type, loop_dev + "p1") + run_as_root('mkfs.' + fs_type, loop_dev + "p1", *mkfs_args) return loop_dev def mount(source, target, fs_type, options='', **kwargs): diff --git a/parts.rst b/parts.rst index 99044ff..e3b8edf 100644 --- a/parts.rst +++ b/parts.rst @@ -155,14 +155,46 @@ checksumming tool, that we use to ensure reproducibility and authenticity of generated binaries. We also build initial ``untar``, ``ungz`` and ``unbz2`` utilities to deal with compressed archives. -``/sysa`` -========= +live-bootstrap seed +=================== -We now move into the ``/sysa`` directory. As stage0-posix has no -concept of ``chdir()`` (not added until very late in stage0-posix), -we have to copy a lot of files into the root of the initramfs, making it -very messy. We get into the move ordered directory ``/sysa`` here, -copying over all of the required binaries from ``/``. +``stage0-posix`` executes a file ``after.kaem``, which creates a kaem script to +continue the bootstrap. This is responsible for cleaning up the mess in +``/x86/bin`` and moving it to the permanent ``/usr/bin``, and setting a few +environment variables. + +script-generator +================ + +``script-generator`` is a program that translates live-bootstrap's +domain-specific manifest language into shell scripts that can be run to complete +the bootstrap. The translator is implemented in ``M2-Planet``. + +The language is fairly simple; each line has the format +``: ``. A predicate only runs the line if a +particular condition is true. + +The following directives are supported: + +* ``build``, builds a particular package defined in ``steps/``. +* ``improve``, runs a script making a distinct and logical improvement to the + live bootstrap system. +* ``define``, define a variable evaluated from other constants/variables. +* ``jump``, moves into a new rootfs/kernel using a custom script. + +checksum-transcriber 1.0 +======================== + +``checksum-transcriber`` is a small program that converts live-bootstrap's +source specification for packages into a SHA256SUM file that can be used to +checksum source tarballs. + +simple-patch 1.0 +================ + +``simple-patch`` is a rudimentary patching program. It works by matching for a +text block given to it, and replacing it with another text block. This is +sufficient for the early patching required before we have full proper GNU patch. mes 0.25 ======== @@ -177,6 +209,10 @@ to this part: 2. We then use this to recompile the Mes interpreter as well as building the libc. This second interpreter is faster and less buggy. +From this point until musl, we are capable of making non-standard and strange +libraries. All libraries are in ``/usr/lib/mes``, and includes are in +``/usr/include/mes``, as they are incompatible with musl. + tinycc 0.9.26 ============= @@ -215,8 +251,8 @@ This is a Linux 2.0 clone which is much simpler to understand and build than Linux. This version of Fiwix is a fork of 1.4.0 that contains many modifications and enhancements to support live-boostrap. -lwext4 1.0.0 -============ +lwext4 1.0.0-lb1 +================ If the kernel bootstrap option is enabled then `lwext4 ` is built next. This is a library for creating ext2/3/4 file systems from user land. @@ -230,11 +266,19 @@ kexec-fiwix If the kernel bootstrap option is enabled then a C program `kexec-fiwix` is compiled and run which places the Fiwix ram drive in memory and launches the Fiwix kernel. -kexec-linux -=========== +esfu 1.0 +======== -If the kernel bootstrap option is enabled then a C program `kexec-linux` is compiled. -This is used as part of the go_sysb step later to launch the Linux kernel. +This is an extremely crippled basic implementation of ``mount`` and ``mknod``. +Sufficient only for the next step. + +early_mount_disk +================ + +When using kernel bootstrap, distfiles from this point exist on an external +disk. Using ``esfu``'s ``mount`` and ``mknod``, we are able to mount this disk. +This is unnecessary when not using kernel bootstrap as everything is done on the +disk. make 3.82 ========= @@ -304,6 +348,12 @@ Bash ships with a bison pre-generated file here which we delete. Unfortunately, we have not bootstrapped bison but fortunately for us, heirloom yacc is able to cope here. +update_env +========== + +This is a simple script that makes some small updates to the env file that were +not possible when using kaem. + flex 2.5.11 =========== @@ -321,8 +371,8 @@ tcc 0.9.27 (patched) We recompile ``tcc`` with some patches needed to build musl. -musl 1.1.24 -=========== +musl 1.1.24 and musl_libdir +=========================== ``musl`` is a C standard library that is lightweight, fast, simple, free, and strives to be correct in the sense of standards-conformance @@ -335,6 +385,9 @@ apply a few patches. In particular, we replace all weak symbols with strong symbols and will patch ``tcc`` in the next step to ignore duplicate symbols. +We do not use any of ``/usr/lib/mes`` or ``/usr/include/mes`` any longer, rather +using ``/usr/lib`` and ``/usr/include`` like normal. + tcc 0.9.27 (musl) ================= @@ -586,12 +639,6 @@ libtool 2.2.4 GNU Libtool is the final part of GNU Autotools. It is a script used to hide away differences when compiling shared libraries on different platforms. -bash 2.05b -========== - -Up to this point, our build of ``bash`` could run scripts but could not be used -interactively. Rebuilding bash makes this functionality work. - automake 1.15.1 =============== @@ -646,6 +693,12 @@ GCC can build the latest as of the time of writing musl version. We also don't need any of the TCC patches that we used before. To accomodate Fiwix, there are patches to avoid syscalls set_thread_area and clone. +Linux headers 5.10.41 +===================== + +This gets some headers out of the Linux kernel that are required to use the +kernel ABI, needed for ``util-linux``. + gcc 4.0.4 ========= @@ -655,10 +708,15 @@ util-linux 2.19.1 ================= ``util-linux`` contains a number of general system administration utilities. -Most pressingly, we need these for being able to mount disks (for non-chroot -mode, but it is built it in chroot mode anyway because it will likely be useful -later). The latest version is not used because of autotools/GCC -incompatibilities. +This gives us access to a much less crippled version of ``mount`` and ``mknod``. +The latest version is not used because of autotools/GCC incompatibilities. + +move_disk +========= + +In ``kernel-bootstrap`` mode, we have been working off an initramfs for some +things up until now. At this point we are now capable of moving to it entirely, +so we do so. kbd-1.15 ======== @@ -685,6 +743,12 @@ bc 1.07.1 ``bc`` is a console based calculator that is sometime used in scripts. We need ``bc`` to rebuild some Linux kernel headers. +kexec-linux +=========== + +If the kernel bootstrap option is enabled then a C program ``kexec-linux`` is compiled. +This can be used to launch a Linux kernel from Fiwix. + kexec-tools 2.0.22 ================== @@ -693,13 +757,6 @@ Linux kernel without a manual restart from within a running system. It is a kind of soft-restart. It is only built for non-chroot mode, as we only use it in non-chroot mode. It is used to go into sysb/sysc. -create_sysb -=========== - -The next step is not a package, but the creation of the sysb rootfs, containing -all of the scripts for sysb (which merely move to sysc). Again, this is only -done in non-chroot mode, because sysb does not exist in chroot mode. - Linux kernel 4.9.10 =================== @@ -716,30 +773,10 @@ so we use a ``find`` command to remove those, which are automatically regenerate The kernel config was originally taken from Void Linux, and was then modified for the requirements of live-bootstrap, including compiler features, drivers, and removing modules. Modules are unused. They are difficult to transfer to -subsequent systems, and we do not have ``modprobe``. Lastly, -the initramfs of sysb is generated in this stage, using ``gen_init_cpio`` within -the Linux kernel tree. This avoids the compilation of ``cpio`` as well. +subsequent systems, and we do not have ``modprobe``. -musl 1.2.4 -========== -Prior to booting Linux, musl is rebuilt yet again with syscalls -``clone`` and ``set_thread_area`` enabled for Linux thread support. - -go_sysb -======= - -This is the last step of sysa, run for non-chroot mode. It uses kexec to load -the new Linux kernel into RAM and execute it, moving into sysb. - -In chroot, sysb is skipped, and data is transferred directly to sysc and -chrooted into. - -sysb -==== - -sysb is purely a transition to sysc, allowing binaries from sysa to get onto a -disk (as sysa does not necessarily have hard disk support in the kernel). -It populates device nodes, mounts sysc, copies over data, and executes sysc. +We then kexec to use the new Linux kernel, using ``kexec-tools`` for a Linux +kernel and ``kexec-linux`` for Fiwix. curl 7.88.1 =========== diff --git a/rootfs.py b/rootfs.py index f4a6d22..648b1d0 100755 --- a/rootfs.py +++ b/rootfs.py @@ -17,18 +17,16 @@ import argparse import os import shutil -from sysa import SysA -from sysc import SysC from lib.utils import run, run_as_root -from lib.sysgeneral import stage0_arch_map from lib.tmpdir import Tmpdir +from lib.generator import Generator, stage0_arch_map def create_configuration_file(args): """ Creates bootstrap.cfg file which would contain options used to customize bootstrap. """ - config_path = os.path.join('sysa', 'bootstrap.cfg') + config_path = os.path.join('steps', 'bootstrap.cfg') with open(config_path, "w", encoding="utf_8") as config: config.write(f"FORCE_TIMESTAMPS={args.force_timestamps}\n") config.write(f"CHROOT={args.chroot or args.bwrap}\n") @@ -38,7 +36,10 @@ def create_configuration_file(args): config.write(f"INTERNAL_CI={args.internal_ci}\n") config.write(f"BARE_METAL={args.bare_metal}\n") if (args.bare_metal or args.qemu) and not args.kernel: - config.write("DISK=sda\n") + if args.repo or args.external_sources: + config.write("DISK=sdb1\n") + else: + config.write("DISK=sdb\n") config.write("KERNEL_BOOTSTRAP=True\n") else: config.write("DISK=sda1\n") @@ -49,7 +50,7 @@ def create_configuration_file(args): def main(): """ A few command line arguments to customize bootstrap. - This function also creates SysA object which prepares directory + This function also creates object which prepares directory structure with bootstrap seeds and all sources. """ parser = argparse.ArgumentParser() @@ -151,16 +152,15 @@ def main(): if args.tmpfs: tmpdir.tmpfs(size=args.tmpfs_size) - # sys - system_c = SysC(arch=args.arch, tmpdir=tmpdir, - external_sources=args.external_sources) - system_a = SysA(arch=args.arch, early_preseed=args.early_preseed, - tmpdir=tmpdir, external_sources=args.external_sources, - repo_path=args.repo) + generator = Generator(tmpdir=tmpdir, + arch=args.arch, + external_sources=args.external_sources, + repo_path=args.repo, + early_preseed=args.early_preseed) - bootstrap(args, system_a, system_c, tmpdir) + bootstrap(args, generator, tmpdir) -def bootstrap(args, system_a, system_c, tmpdir): +def bootstrap(args, generator, tmpdir): """Kick off bootstrap process.""" print(f"Bootstrapping {args.arch} -- SysA") if args.chroot: @@ -171,17 +171,15 @@ print(shutil.which('chroot')) chroot_binary = run_as_root('python3', '-c', find_chroot, capture_output=True).stdout.decode().strip() - system_c.prepare(create_disk_image=False) - system_a.prepare(create_initramfs=False) + generator.prepare(using_kernel=False) arch = stage0_arch_map.get(args.arch, args.arch) init = os.path.join(os.sep, 'bootstrap-seeds', 'POSIX', arch, 'kaem-optional-seed') - run_as_root('env', '-i', 'PATH=/bin', chroot_binary, system_a.tmp_dir, init) + run_as_root('env', '-i', 'PATH=/bin', chroot_binary, generator.tmp_dir, init) elif args.bwrap: if not args.internal_ci or args.internal_ci == "pass1": - system_c.prepare(create_disk_image=False) - system_a.prepare(create_initramfs=False) + generator.prepare(using_kernel=False) arch = stage0_arch_map.get(args.arch, args.arch) init = os.path.join(os.sep, 'bootstrap-seeds', 'POSIX', arch, 'kaem-optional-seed') @@ -191,7 +189,7 @@ print(shutil.which('chroot')) '--unshare-net', '--clearenv', '--setenv', 'PATH', '/usr/bin', - '--bind', system_a.tmp_dir, '/', + '--bind', generator.tmp_dir, '/', '--dir', '/dev', '--dev-bind', '/dev/null', '/dev/null', '--dev-bind', '/dev/zero', '/dev/zero', @@ -210,7 +208,7 @@ print(shutil.which('chroot')) '--unshare-net' if args.external_sources else None, '--clearenv', '--setenv', 'PATH', '/usr/bin', - '--bind', system_a.tmp_dir + "/sysc_image", '/', + '--bind', generator.tmp_dir + "/sysc_image", '/', '--dir', '/dev', '--dev-bind', '/dev/null', '/dev/null', '--dev-bind', '/dev/zero', '/dev/zero', @@ -226,40 +224,39 @@ print(shutil.which('chroot')) elif args.bare_metal: if args.kernel: - system_c.prepare(create_disk_image=True) - system_a.prepare(create_initramfs=True) + generator.prepare(using_kernel=True) print("Please:") - print(" 1. Take tmp/sysa/initramfs and your kernel, boot using this.") - print(" 2. Take tmp/sysc/disk.img and put this on a writable storage medium.") + print(" 1. Take tmp/initramfs and your kernel, boot using this.") + print(" 2. Take tmp/disk.img and put this on a writable storage medium.") else: - system_a.prepare(create_initramfs=True, kernel_bootstrap=True) + generator.prepare(kernel_bootstrap=True) print("Please:") - print(" 1. Take tmp/sysa/sysa.img and write it to a boot drive and then boot it.") + print(" 1. Take tmp/disk.img and write it to a boot drive and then boot it.") else: if args.kernel: - system_c.prepare(create_disk_image=True) - system_a.prepare(create_initramfs=True) + generator.prepare(using_kernel=True) run(args.qemu_cmd, '-enable-kvm', '-m', str(args.qemu_ram) + 'M', '-smp', str(args.cores), '-no-reboot', - '-hda', tmpdir.get_disk("sysc"), + '-drive', 'file=' + tmpdir.get_disk("disk") + ',format=raw', + '-drive', 'file=' + tmpdir.get_disk("external") + ',format=raw', '-nic', 'user,ipv6=off,model=e1000', '-kernel', args.kernel, - '-initrd', system_a.initramfs_path, '-nographic', - '-append', 'console=ttyS0') + '-append', 'console=ttyS0 root=/dev/sda1 rootfstype=ext3 init=/init rw') else: - system_a.prepare(create_initramfs=True, kernel_bootstrap=True) + generator.prepare(kernel_bootstrap=True) run(args.qemu_cmd, '-enable-kvm', '-m', "4G", '-smp', str(args.cores), '-no-reboot', - '-drive', 'file=' + os.path.join(system_a.tmp_dir, 'sysa.img') + ',format=raw', + '-drive', 'file=' + os.path.join(generator.tmp_dir, 'disk.img') + ',format=raw', + '-drive', 'file=' + tmpdir.get_disk("external") + ',format=raw', '-machine', 'kernel-irqchip=split', '-nic', 'user,ipv6=off,model=e1000', '-nographic') diff --git a/sysa/after-preseeded.kaem b/seed/after.kaem similarity index 55% rename from sysa/after-preseeded.kaem rename to seed/after.kaem index 61dea2d..79c6f61 100755 --- a/sysa/after-preseeded.kaem +++ b/seed/after.kaem @@ -9,17 +9,7 @@ set -ex -# Set commonly used variables -sysa=/sysa -DISTFILES=/sysa/distfiles -PREFIX=/usr -BINDIR=${PREFIX}/bin -LIBDIR=${PREFIX}/lib/mes -INCDIR=${PREFIX}/include/mes -SRCDIR=${PREFIX}/src -TMPDIR=/tmp # tmpdir is needed for patch to work -PATH=${BINDIR} +PATH=/${ARCH_DIR}/bin -cd ${sysa} - -exec bash run.sh +catm seed-full.kaem /steps/env seed.kaem +kaem --file seed-full.kaem diff --git a/seed/preseeded.kaem b/seed/preseeded.kaem new file mode 100755 index 0000000..54279f1 --- /dev/null +++ b/seed/preseeded.kaem @@ -0,0 +1,8 @@ +#!/bin/sh +# +# SPDX-FileCopyrightText: 2023 fosslinux +# +# SPDX-License-Identifier: GPL-3.0-or-later + +/script-generator /steps/manifest +/usr/bin/kaem --file /preseed-jump.kaem diff --git a/seed/script-generator.c b/seed/script-generator.c new file mode 100644 index 0000000..fd57c12 --- /dev/null +++ b/seed/script-generator.c @@ -0,0 +1,627 @@ +/* + * SPDX-FileCopyrightText: 2023 fosslinux + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#define MAX_TOKEN 64 +#define MAX_STRING 2048 + +#include +#include +#include +#include + +struct Token { + char *val; + struct Token *next; +}; +typedef struct Token Token; + +#define TYPE_BUILD 1 +#define TYPE_IMPROVE 2 +#define TYPE_DEFINE 3 +#define TYPE_JUMP 4 +#define TYPE_MAINT 5 + +struct Directive { + Token *tok; + struct Directive *next; + int type; + char *arg; /* The primary argument */ +}; +typedef struct Directive Directive; + +/* Tokenizer. */ + +/* Skip over a comment. */ +char consume_comment(FILE *in) { + /* Discard the rest of the line. */ + char c = fgetc(in); + while (c != -1 && c != '\n') + c = fgetc(in); + return c; +} + +char consume_line(FILE *in, Directive *directive) { + char c = fgetc(in); + + /* Short-circuit if whole line is comment or blank line. */ + if (c == '#') { + c = consume_comment(in); + return c; + } else if (c == '\n' || c == -1) { + return c; + } + + /* Ok, we will have something to put here. */ + directive->next = calloc(1, sizeof(Directive)); + directive = directive->next; + + Token *head = calloc(1, sizeof(Token)); + Token *cur = head; + char *out; + int i = 0; + while (c != -1 && c != '\n') { + /* Initialize next token. */ + cur->next = calloc(1, sizeof(Token)); + cur = cur->next; + cur->val = calloc(MAX_TOKEN, sizeof(char)); + out = cur->val; + /* Copy line to token until a space (or EOL/EOF) or comment is found. */ + while (c != -1 && c != '\n' && c != ' ' && c != '#') { + out[0] = c; + out += 1; + c = fgetc(in); + } + /* Go to start of next token. */ + if (c == ' ') { + c = fgetc(in); + } + /* Handle comment. */ + if (c == '#') { + c = consume_comment(in); + } + } + + /* Add information to directive. */ + directive->tok = head->next; + + return c; +} + +Directive *tokenizer(FILE *in) { + Directive *head = calloc(1, sizeof(Directive)); + Directive *cur = head; + + char c; + while (c != -1) { + /* + * Note that consume_line fills cur->next, not cur. + * This avoids having an empty last Directive. + */ + c = consume_line(in, cur); + if (cur->next != NULL) { + cur = cur->next; + } + } + return head->next; +} + +/* Config variables. */ + +struct Variable { + char *name; + char *val; + struct Variable *next; +}; +typedef struct Variable Variable; + +Variable *variables; + +Variable *load_config() { + FILE *config = fopen("/steps/bootstrap.cfg", "r"); + /* File does not exist check. */ + if (config == NULL) { + return NULL; + } + + char *line = calloc(MAX_STRING, sizeof(char)); + Variable *head = calloc(1, sizeof(Variable)); + Variable *cur = head; + /* For each line... */ + char *equals; + while (fgets(line, MAX_STRING, config) != 0) { + /* Weird M2-Planet behaviour. */ + if (*line == 0) { + break; + } + cur->next = calloc(1, sizeof(Variable)); + cur = cur->next; + /* Split on the equals. First half is name, second half is value. */ + equals = strchr(line, '='); + if (equals == 0) { + fputs("bootstrap.cfg should have the format var=val on each line.", stderr); + exit(1); + } + cur->name = calloc(equals - line + 1, sizeof(char)); + strncpy(cur->name, line, equals - line); + equals += 1; + cur->val = calloc(strlen(equals), sizeof(char)); + strncpy(cur->val, equals, strlen(equals) - 1); + line = calloc(MAX_STRING, sizeof(char)); + } + variables = head->next; + fclose(config); +} + +void output_config(FILE *out) { + Variable *variable; + for (variable = variables; variable != NULL; variable = variable->next) { + fputs(variable->name, out); + fputs("=", out); + fputs(variable->val, out); + fputs("\n", out); + } +} + +char *get_var(char *name) { + /* Search through existing variables. */ + Variable *var; + Variable *last; + for (var = variables; var != NULL; var = var->next) { + if (strcmp(name, var->name) == 0) { + return var->val; + } + last = var; + } + + /* If the variable is unset, prompt the user. */ + if (variables == NULL) { + variables = calloc(1, sizeof(Variable)); + var = variables; + } else { + last->next = calloc(1, sizeof(Variable)); + var = last->next; + } + var->name = calloc(strlen(name) + 1, sizeof(char)); + strcpy(var->name, name); + var->val = calloc(MAX_STRING, sizeof(char)); + fputs("You have not set a value for ", stdout); + fputs(name, stdout); + fputs(" in bootstrap.cfg. Please set it now:\n", stdout); + while (fgets(var->val, MAX_STRING, stdin) == 0 || var->val[0] == '\n') { + fputs("Error inputting, try again:\n", stdout); + } + if (var->val[0] == 0) { + fputs("You put in an EOF!\n", stderr); + exit(1); + } + /* Trim the newline. */ + var->val[strlen(var->val)] = 0; + return var->val; +} + +/* Recursive descent interpreter. */ + +Token *fill(Token *tok, Directive *directive, int type) { + directive->type = type; + directive->arg = tok->val; + return tok->next; +} + +Token *logic(Token *tok, char **val) { + /* logic = "(" + * (name | + * (name "==" value) | + * (logic "||" logic) | + * (logic "&&" logic)) + * ")" + */ + + char *lhs = tok->val; + char *rhs; + tok = tok->next; + if (strcmp(tok->val, ")") == 0) { + /* Case where it's just a constant. */ + *val = lhs; + return tok; + } else if (strcmp(tok->val, "==") == 0) { + /* Case for equality. */ + rhs = tok->next->val; + tok = tok->next->next; + if (strcmp(get_var(lhs), rhs) == 0) { + lhs = "True"; + } else { + lhs = "False"; + } + } else { + fputs("Expected == after ", stderr); + fputs(lhs, stderr); + fputs(" in logic\n", stderr); + exit(1); + } + + if (strcmp(tok->val, ")") == 0) { + *val = lhs; + return tok; + } else if (strcmp(tok->val, "||") == 0) { + /* OR */ + tok = logic(tok->next, &rhs); + if (strcmp(lhs, "True") == 0 || strcmp(rhs, "True") == 0) { + lhs = "True"; + } else { + lhs = "False"; + } + } else if (strcmp(tok->val, "&&") == 0) { + /* AND */ + tok = logic(tok->next, &rhs); + if (strcmp(lhs, "True") == 0 && strcmp(rhs, "True") == 0) { + lhs = "True"; + } else { + lhs = "False"; + } + } else { + fputs("Expected || or && in logic\n", stderr); + exit(1); + } + + *val = lhs; + return tok; +} + +Token *primary_logic(Token *tok, char **val) { + /* Starting ( */ + if (strcmp(tok->val, "(") != 0) { + fputs("Expected logic to begin with (\n", stderr); + exit(1); + } + tok = tok->next; + + tok = logic(tok, val); + + if (strcmp(tok->val, ")") != 0) { + fputs("Expected logic to end with )\n", stderr); + exit(1); + } + + return tok; +} + +int eval_predicate(Token *tok) { + char *result; + tok = primary_logic(tok, &result); + return strcmp(result, "True") == 0; +} + +Token *define(Token *tok, Directive *directive) { + /* define = name "=" (logic | constant) */ + char *name = tok->val; + tok = tok->next; + if (strcmp(tok->val, "=") != 0) { + fputs("define of ", stderr); + fputs(name, stderr); + fputs(" has a missing equals\n", stderr); + exit(1); + } + tok = tok->next; + + char *val = calloc(MAX_STRING, sizeof(char)); + if (strcmp(tok->val, "(") == 0) { + /* It is a logic. */ + tok = primary_logic(tok, &val); + } else { + /* It is a constant. */ + strcpy(val, tok->val); + } + + /* Check for predicate. */ + tok = tok->next; + if (tok != NULL) { + if (!eval_predicate(tok)) { + /* Nothing more to do. */ + return tok; + } + } + + /* Update existing variable, or else, add to the end of variables. */ + /* Special case: empty variables. */ + if (variables == NULL) { + variables = calloc(1, sizeof(Variable)); + variables->name = name; + variables->val = val; + } + + Variable *var; + for (var = variables; var->next != NULL; var = var->next) { + if (strcmp(var->next->name, name) == 0) { + var->next->val = val; + break; + } + } + if (var->next == NULL) { + /* We did not update an existing variable. */ + var->next = calloc(1, sizeof(Variable)); + var->next->name = name; + var->next->val = val; + } + + return tok; +} + +int interpret(Directive *directive) { + /* directive = (build | improve | define | jump | maint) predicate? */ + Token *tok = directive->tok; + if (strcmp(tok->val, "build:") == 0) { + tok = fill(tok->next, directive, TYPE_BUILD); + } else if (strcmp(tok->val, "improve:") == 0) { + tok = fill(tok->next, directive, TYPE_IMPROVE); + } else if (strcmp(tok->val, "jump:") == 0) { + tok = fill(tok->next, directive, TYPE_JUMP); + } else if (strcmp(tok->val, "maint:") == 0) { + tok = fill(tok->next, directive, TYPE_MAINT); + } else if (strcmp(tok->val, "define:") == 0) { + tok = define(tok->next, directive); + return 1; /* There is no codegen for a define. */ + } + + if (tok != NULL) { + return !eval_predicate(tok); + } + return 0; +} + +Directive *interpreter(Directive *directives) { + Directive *directive; + Directive *last; + for (directive = directives; directive != NULL; directive = directive->next) { + if (interpret(directive)) { + /* This means this directive needs to be removed from the linked list. */ + if (last == NULL) { + /* First directive. */ + directives = directive->next; + } else { + last->next = directive->next; + } + } else { + last = directive; + } + } + return directives; +} + +void add_to_fiwix_filelist(char *filename) { + /* Add the filename to fiwix-file-list.txt */ + FILE *fiwix_list = fopen("/steps/lwext4-1.0.0-lb1/files/fiwix-file-list.txt", "r"); + fseek(fiwix_list, 0, SEEK_END); + long size = ftell(fiwix_list); + char *contents = calloc(size, sizeof(char)); + fseek(fiwix_list, 0, SEEK_SET); + fread(contents, 1, size, fiwix_list); + fclose(fiwix_list); + fiwix_list = fopen("/steps/lwext4-1.0.0-lb1/files/fiwix-file-list.txt", "w"); + fwrite(contents, 1, size, fiwix_list); + fputs(filename, fiwix_list); + fputc('\n', fiwix_list); + fclose(fiwix_list); +} + +/* Script generator. */ +FILE *start_script(int id, int using_bash) { + /* Create the file /steps/$id.sh */ + char *filename = calloc(MAX_STRING, sizeof(char)); + strcpy(filename, "/steps/"); + strcat(filename, int2str(id, 10, 0)); + strcat(filename, ".sh"); + add_to_fiwix_filelist(filename); + + FILE *out = fopen(filename, "w"); + if (out == NULL) { + fputs("Error opening output file ", stderr); + fputs(filename, stderr); + fputs("\n", stderr); + exit(1); + } + + if (using_bash) { + fputs("#!/bin/bash\n", out); + fputs("set -e\n", out); + fputs("cd /steps\n", out); + fputs(". ./bootstrap.cfg\n", out); + fputs(". ./env\n", out); + fputs(". ./helpers.sh\n", out); + } else { + fputs("set -ex\n", out); + fputs("cd /steps\n", out); + output_config(out); + FILE *env = fopen("/steps/env", "r"); + char *line = calloc(MAX_STRING, sizeof(char)); + while (fgets(line, MAX_STRING, env) != 0) { + /* Weird M2-Planet behaviour. */ + if (*line == 0) { + break; + } + fputs(line, out); + line = calloc(MAX_STRING, sizeof(char)); + } + fclose(env); + } + + return out; +} + +void output_call_script(FILE *out, char *type, char *name, int using_bash, int source) { + if (using_bash) { + if (source) { + fputs(". ", out); + } else { + fputs("bash ", out); + } + } else { + fputs("kaem --file ", out); + } + fputs("/steps/", out); + fputs(type, out); + fputs("/", out); + fputs(name, out); + fputs(".sh\n", out); +} + +void output_build(FILE *out, Directive *directive, int pass_no, int using_bash) { + if (using_bash) { + fputs("build ", out); + fputs(directive->arg, out); + fputs(" pass", out); + fputs(int2str(pass_no, 10, 0), out); + fputs(".sh\n", out); + } else { + fputs("pkg=", out); + fputs(directive->arg, out); + fputs("\n", out); + fputs("cd ${pkg}\n", out); + fputs("kaem --file pass", out); + fputs(int2str(pass_no, 10, 0), out); + fputs(".kaem\n", out); + fputs("cd ..\n", out); + } +} + +void generate_preseed_jump(int id) { + FILE *out = fopen("/preseed-jump.kaem", "w"); + fputs("set -ex\n", out); + fputs("PATH=/usr/bin\n", out); + fputs("bash /steps/", out); + fputs(int2str(id, 10, 0), out); + fputs(".sh\n", out); + fclose(out); +} + +void generate(Directive *directives) { + /* + * We are separating the stages given in the mainfest into a bunch of + * smaller scripts. The following conditions call for the creation of + * a new script: + * - a jump + * - build of bash + */ + + int counter = 0; + + /* Initially, we use kaem, not bash. */ + int using_bash = 0; + + FILE *out = start_script(counter, using_bash); + counter += 1; + + Directive *directive; + Directive *past; + char *filename; + int pass_no; + for (directive = directives; directive != NULL; directive = directive->next) { + if (directive->type == TYPE_BUILD) { + /* Get what pass number this is. */ + pass_no = 1; + for (past = directives; past != directive; past = past->next) { + if (strcmp(past->arg, directive->arg) == 0) { + pass_no += 1; + } + } + output_build(out, directive, pass_no, using_bash); + if (strncmp(directive->arg, "bash-", 5) == 0) { + if (!using_bash) { + /* + * We are transitioning from bash to kaem, the point at which "early + * preseed" occurs. So generate the preseed jump script at this point. + */ + generate_preseed_jump(counter); + } + using_bash = 1; + /* Create call to new script. */ + output_call_script(out, "", int2str(counter, 10, 0), using_bash, 0); + fclose(out); + out = start_script(counter, using_bash); + counter += 1; + } + } else if (directive->type == TYPE_IMPROVE) { + output_call_script(out, "improve", directive->arg, using_bash, 1); + } else if (directive->type == TYPE_JUMP) { + /* + * Create /init to call new script. + * We actually do this by creating /init.X for some number X, and then + * moving that to /init at the appropriate time. + */ + filename = calloc(MAX_STRING, sizeof(char)); + if (using_bash) { + fputs("mv /init /init.bak\n", out); + /* Move new init to /init. */ + strcpy(filename, "/init."); + strcat(filename, int2str(counter, 10, 0)); + fputs("cp ", out); + fputs(filename, out); + fputs(" /init\n", out); + fputs("chmod 755 /init\n", out); + } else { + strcpy(filename, "/kaem.run."); + strcat(filename, int2str(counter, 10, 0)); + fputs("cp ", out); + fputs(filename, out); + fputs(" /kaem.run\n", out); + fputs("cp /usr/bin/kaem /init\n", out); + fputs("chmod 755 /init\n", out); + } + + output_call_script(out, "jump", directive->arg, using_bash, 1); + fclose(out); + + /* + * This cannot go before here as builder-hex0 does not like having + * multiple files open at once! + */ + add_to_fiwix_filelist(filename); + + if (using_bash) { + out = fopen(filename, "w"); + if (out == NULL) { + fputs("Error opening /init\n", stderr); + exit(1); + } + fputs("#!/bin/bash\n", out); + } else { + out = fopen(filename, "w"); + if (out == NULL) { + fputs("Error opening /kaem.run\n", stderr); + exit(1); + } + fputs("set -ex\n", out); + } + output_call_script(out, "", int2str(counter, 10, 0), using_bash, 0); + fclose(out); + out = start_script(counter, using_bash); + counter += 1; + } else if (directive->type == TYPE_MAINT) { + output_call_script(out, "maint", directive->arg, using_bash, 1); + } + } + fclose(out); +} + +void main(int argc, char **argv) { + if (argc != 2) { + fputs("Usage: script-generator