SPDX-FileCopyrightText: 2022 fosslinux <fosslinux@aussies.space>

SPDX-License-Identifier: PSF-2.0

We are building Python 3 using Python 2 as our bootstrap. But
makeunicodedata has been converted to Python 3. We need to
convert back, particularly print statements, and writing to
files.

We only apply this to the first build.

--- Tools/unicode/makeunicodedata.py	2012-04-10 09:25:37.000000000 +1000
+++ Tools/unicode/makeunicodedata.py	2022-07-13 14:13:37.864821008 +1000
@@ -67,7 +67,7 @@
 
 def maketables(trace=0):
 
-    print("--- Reading", UNICODE_DATA % "", "...")
+    print "--- Reading", UNICODE_DATA % "", "..."
 
     version = ""
     unicode = UnicodeData(UNICODE_DATA % version,
@@ -76,15 +76,15 @@
                           DERIVED_CORE_PROPERTIES % version,
                           DERIVEDNORMALIZATION_PROPS % version)
 
-    print(len(list(filter(None, unicode.table))), "characters")
+    print len(list(filter(None, unicode.table))), "characters"
 
     for version in old_versions:
-        print("--- Reading", UNICODE_DATA % ("-"+version), "...")
+        print "--- Reading", UNICODE_DATA % ("-"+version) + "..."
         old_unicode = UnicodeData(UNICODE_DATA % ("-"+version),
                                   COMPOSITION_EXCLUSIONS % ("-"+version),
                                   EASTASIAN_WIDTH % ("-"+version),
                                   DERIVED_CORE_PROPERTIES % ("-"+version))
-        print(len(list(filter(None, old_unicode.table))), "characters")
+        print len(list(filter(None, old_unicode.table))), "characters"
         merge_old_version(version, unicode, old_unicode)
 
     makeunicodename(unicode, trace)
@@ -103,7 +103,7 @@
 
     FILE = "Modules/unicodedata_db.h"
 
-    print("--- Preparing", FILE, "...")
+    print "--- Preparing", FILE, "..."
 
     # 1) database properties
 
@@ -214,92 +214,90 @@
         l = comp_last[l]
         comp_data[f*total_last+l] = char
 
-    print(len(table), "unique properties")
-    print(len(decomp_prefix), "unique decomposition prefixes")
-    print(len(decomp_data), "unique decomposition entries:", end=' ')
-    print(decomp_size, "bytes")
-    print(total_first, "first characters in NFC")
-    print(total_last, "last characters in NFC")
-    print(len(comp_pairs), "NFC pairs")
+    print len(table), "unique properties"
+    print len(decomp_prefix), "unique decomposition prefixes"
+    print len(decomp_data), "unique decomposition entries:",
+    print decomp_size, "bytes"
+    print total_first, "first characters in NFC"
+    print total_last, "last characters in NFC"
+    print len(comp_pairs), "NFC pairs"
 
-    print("--- Writing", FILE, "...")
+    print "--- Writing", FILE, "..."
 
     fp = open(FILE, "w")
-    print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
-    print(file=fp)
-    print('#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION, file=fp)
-    print("/* a list of unique database records */", file=fp)
-    print("const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {", file=fp)
+    fp.write("/* this file was generated by %s %s */\n\n" % (SCRIPT, VERSION))
+    fp.write('#define UNIDATA_VERSION "%s"\n' % UNIDATA_VERSION)
+    fp.write("/* a list of unique database records */\n")
+    fp.write("const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {\n")
     for item in table:
-        print("    {%d, %d, %d, %d, %d, %d}," % item, file=fp)
-    print("};", file=fp)
-    print(file=fp)
-
-    print("/* Reindexing of NFC first characters. */", file=fp)
-    print("#define TOTAL_FIRST",total_first, file=fp)
-    print("#define TOTAL_LAST",total_last, file=fp)
-    print("struct reindex{int start;short count,index;};", file=fp)
-    print("static struct reindex nfc_first[] = {", file=fp)
+        fp.write("    {%d, %d, %d, %d, %d, %d},\n" % item)
+    fp.write("};\n\n")
+
+    fp.write("/* Reindexing of NFC first characters. */\n")
+    fp.write("#define TOTAL_FIRST %d \n" % total_first)
+    fp.write("#define TOTAL_LAST %d \n" % total_last)
+    fp.write("struct reindex{int start;short count,index;};\n")
+    fp.write("static struct reindex nfc_first[] = {\n")
     for start,end in comp_first_ranges:
-        print("  { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp)
-    print("  {0,0,0}", file=fp)
-    print("};\n", file=fp)
-    print("static struct reindex nfc_last[] = {", file=fp)
+        fp.write("  { %d, %d, %d},\n" % (start,end-start,comp_first[start]))
+    fp.write("  {0,0,0}\n")
+    fp.write("};\n")
+    fp.write("static struct reindex nfc_last[] = {\n")
     for start,end in comp_last_ranges:
-        print("  { %d, %d, %d}," % (start,end-start,comp_last[start]), file=fp)
-    print("  {0,0,0}", file=fp)
-    print("};\n", file=fp)
+        fp.write("  { %d, %d, %d},\n" % (start,end-start,comp_last[start]))
+    fp.write("  {0,0,0}\n")
+    fp.write("};\n")
 
     # FIXME: <fl> the following tables could be made static, and
     # the support code moved into unicodedatabase.c
 
-    print("/* string literals */", file=fp)
-    print("const char *_PyUnicode_CategoryNames[] = {", file=fp)
+    fp.write("/* string literals */")
+    fp.write("const char *_PyUnicode_CategoryNames[] = {")
     for name in CATEGORY_NAMES:
-        print("    \"%s\"," % name, file=fp)
-    print("    NULL", file=fp)
-    print("};", file=fp)
+        fp.write("    \"%s\",\n" % name)
+    fp.write("    NULL\n")
+    fp.write("};\n")
 
-    print("const char *_PyUnicode_BidirectionalNames[] = {", file=fp)
+    fp.write("const char *_PyUnicode_BidirectionalNames[] = {\n")
     for name in BIDIRECTIONAL_NAMES:
-        print("    \"%s\"," % name, file=fp)
-    print("    NULL", file=fp)
-    print("};", file=fp)
+        fp.write("    \"%s\",\n" % name)
+    fp.write("    NULL\n")
+    fp.write("};\n")
 
-    print("const char *_PyUnicode_EastAsianWidthNames[] = {", file=fp)
+    fp.write("const char *_PyUnicode_EastAsianWidthNames[] = {\n")
     for name in EASTASIANWIDTH_NAMES:
-        print("    \"%s\"," % name, file=fp)
-    print("    NULL", file=fp)
-    print("};", file=fp)
+        fp.write("    \"%s\",\n" % name)
+    fp.write("    NULL\n")
+    fp.write("};\n")
 
-    print("static const char *decomp_prefix[] = {", file=fp)
+    fp.write("static const char *decomp_prefix[] = {\n")
     for name in decomp_prefix:
-        print("    \"%s\"," % name, file=fp)
-    print("    NULL", file=fp)
-    print("};", file=fp)
+        fp.write("    \"%s\",\n" % name)
+    fp.write("    NULL\n")
+    fp.write("};\n")
 
     # split record index table
     index1, index2, shift = splitbins(index, trace)
 
-    print("/* index tables for the database records */", file=fp)
-    print("#define SHIFT", shift, file=fp)
+    fp.write("/* index tables for the database records */\n")
+    fp.write("#define SHIFT %d\n" % shift)
     Array("index1", index1).dump(fp, trace)
     Array("index2", index2).dump(fp, trace)
 
     # split decomposition index table
     index1, index2, shift = splitbins(decomp_index, trace)
 
-    print("/* decomposition data */", file=fp)
+    fp.write("/* decomposition data */\n")
     Array("decomp_data", decomp_data).dump(fp, trace)
 
-    print("/* index tables for the decomposition data */", file=fp)
-    print("#define DECOMP_SHIFT", shift, file=fp)
+    fp.write("/* index tables for the decomposition data */\n")
+    fp.write("#define DECOMP_SHIFT %d\n" % shift)
     Array("decomp_index1", index1).dump(fp, trace)
     Array("decomp_index2", index2).dump(fp, trace)
 
     index, index2, shift = splitbins(comp_data, trace)
-    print("/* NFC pairs */", file=fp)
-    print("#define COMP_SHIFT", shift, file=fp)
+    fp.write("/* NFC pairs */\n")
+    fp.write("#define COMP_SHIFT %d\n" % shift)
     Array("comp_index", index).dump(fp, trace)
     Array("comp_data", index2).dump(fp, trace)
 
@@ -316,30 +314,30 @@
                 index[i] = cache[record] = len(records)
                 records.append(record)
         index1, index2, shift = splitbins(index, trace)
-        print("static const change_record change_records_%s[] = {" % cversion, file=fp)
+        fp.write("static const change_record change_records_%s[] = {\n" % cversion)
         for record in records:
-            print("\t{ %s }," % ", ".join(map(str,record)), file=fp)
-        print("};", file=fp)
-        Array("changes_%s_index" % cversion, index1).dump(fp, trace)
-        Array("changes_%s_data" % cversion, index2).dump(fp, trace)
-        print("static const change_record* get_change_%s(Py_UCS4 n)" % cversion, file=fp)
-        print("{", file=fp)
-        print("\tint index;", file=fp)
-        print("\tif (n >= 0x110000) index = 0;", file=fp)
-        print("\telse {", file=fp)
-        print("\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift), file=fp)
-        print("\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \
-              (cversion, shift, ((1<<shift)-1)), file=fp)
-        print("\t}", file=fp)
-        print("\treturn change_records_%s+index;" % cversion, file=fp)
-        print("}\n", file=fp)
-        print("static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion, file=fp)
-        print("{", file=fp)
-        print("\tswitch(n) {", file=fp)
+            fp.write("\t{ %s },\n" % ", ".join(map(str,record)))
+        fp.write("};\n")
+        Array("changes_%s_index\n" % cversion, index1).dump(fp, trace)
+        Array("changes_%s_data\n" % cversion, index2).dump(fp, trace)
+        fp.write("static const change_record* get_change_%s(Py_UCS4 n)\n" % cversion)
+        fp.write("{\n")
+        fp.write("\tint index;\n")
+        fp.write("\tif (n >= 0x110000) index = 0;\n")
+        fp.write("\telse {\n")
+        fp.write("\t\tindex = changes_%s_index[n>>%d];\n" % (cversion, shift))
+        fp.write("\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];\n" % \
+              (cversion, shift, ((1<<shift)-1)))
+        fp.write("\t}\n")
+        fp.write("\treturn change_records_%s+index;\n" % cversion)
+        fp.write("}\n\n")
+        fp.write("static Py_UCS4 normalization_%s(Py_UCS4 n)\n" % cversion)
+        fp.write("{\n")
+        fp.write("\tswitch(n) {\n")
         for k, v in normalization:
-            print("\tcase %s: return 0x%s;" % (hex(k), v), file=fp)
-        print("\tdefault: return 0;", file=fp)
-        print("\t}\n}\n", file=fp)
+            fp.write("\tcase %s: return 0x%s;\n" % (hex(k), v))
+        fp.write("\tdefault: return 0;\n")
+        fp.write("\t}\n}\n\n")
 
     fp.close()
 
@@ -350,7 +348,7 @@
 
     FILE = "Objects/unicodetype_db.h"
 
-    print("--- Preparing", FILE, "...")
+    print "--- Preparing", FILE, "..."
 
     # extract unicode types
     dummy = (0, 0, 0, 0, 0, 0)
@@ -433,25 +431,25 @@
                 table.append(item)
             index[char] = i
 
-    print(len(table), "unique character type entries")
+    print len(table), "unique character type entries"
 
-    print("--- Writing", FILE, "...")
+    print "--- Writing", FILE, "..."
 
     fp = open(FILE, "w")
-    print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
-    print(file=fp)
-    print("/* a list of unique character type descriptors */", file=fp)
-    print("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {", file=fp)
+    fp.write("/* this file was generated by %s %s */\n" % (SCRIPT, VERSION))
+    fp.write("\n")
+    fp.write("/* a list of unique character type descriptors */\n")
+    fp.write("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {\n")
     for item in table:
-        print("    {%d, %d, %d, %d, %d, %d}," % item, file=fp)
-    print("};", file=fp)
-    print(file=fp)
+        fp.write("    {%d, %d, %d, %d, %d, %d},\n" % item)
+    fp.write("};\n")
+    fp.write("\n")
 
     # split decomposition index table
     index1, index2, shift = splitbins(index, trace)
 
-    print("/* type indexes */", file=fp)
-    print("#define SHIFT", shift, file=fp)
+    fp.write("/* type indexes */\n")
+    fp.write("#define SHIFT %d\n" % shift)
     Array("index1", index1).dump(fp, trace)
     Array("index2", index2).dump(fp, trace)
 
@@ -464,7 +462,7 @@
 
     FILE = "Modules/unicodename_db.h"
 
-    print("--- Preparing", FILE, "...")
+    print "--- Preparing", FILE, "..."
 
     # collect names
     names = [None] * len(unicode.chars)
@@ -476,7 +474,7 @@
             if name and name[0] != "<":
                 names[char] = name + chr(0)
 
-    print(len(list(n for n in names if n is not None)), "distinct names")
+    print len(list(n for n in names if n is not None)), "distinct names"
 
     # collect unique words from names (note that we differ between
     # words inside a sentence, and words ending a sentence.  the
@@ -497,7 +495,7 @@
                 else:
                     words[w] = [len(words)]
 
-    print(n, "words in text;", b, "bytes")
+    print n, "words in text;", b, "bytes"
 
     wordlist = list(words.items())
 
@@ -511,19 +509,19 @@
     escapes = 0
     while escapes * 256 < len(wordlist):
         escapes = escapes + 1
-    print(escapes, "escapes")
+    print escapes, "escapes"
 
     short = 256 - escapes
 
     assert short > 0
 
-    print(short, "short indexes in lexicon")
+    print short, "short indexes in lexicon"
 
     # statistics
     n = 0
     for i in range(short):
         n = n + len(wordlist[i][1])
-    print(n, "short indexes in phrasebook")
+    print n, "short indexes in phrasebook"
 
     # pick the most commonly used words, and sort the rest on falling
     # length (to maximize overlap)
@@ -592,29 +590,29 @@
 
     codehash = Hash("code", data, 47)
 
-    print("--- Writing", FILE, "...")
+    print "--- Writing", FILE, "..."
 
     fp = open(FILE, "w")
-    print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
-    print(file=fp)
-    print("#define NAME_MAXLEN", 256, file=fp)
-    print(file=fp)
-    print("/* lexicon */", file=fp)
+    fp.write("/* this file was generated by %s %s */\n" % (SCRIPT, VERSION))
+    fp.write("\n")
+    fp.write("#define NAME_MAXLEN 256")
+    fp.write("\n")
+    fp.write("/* lexicon */\n")
     Array("lexicon", lexicon).dump(fp, trace)
     Array("lexicon_offset", lexicon_offset).dump(fp, trace)
 
     # split decomposition index table
     offset1, offset2, shift = splitbins(phrasebook_offset, trace)
 
-    print("/* code->name phrasebook */", file=fp)
-    print("#define phrasebook_shift", shift, file=fp)
-    print("#define phrasebook_short", short, file=fp)
+    fp.write("/* code->name phrasebook */\n")
+    fp.write("#define phrasebook_shift %d\n" % shift)
+    fp.write("#define phrasebook_short %d\n" % short)
 
     Array("phrasebook", phrasebook).dump(fp, trace)
     Array("phrasebook_offset1", offset1).dump(fp, trace)
     Array("phrasebook_offset2", offset2).dump(fp, trace)
 
-    print("/* name->code dictionary */", file=fp)
+    fp.write("/* name->code dictionary */\n")
     codehash.dump(fp, trace)
 
     fp.close()
@@ -868,7 +866,7 @@
         else:
             raise AssertionError("ran out of polynomials")
 
-        print(size, "slots in hash table")
+        print size, "slots in hash table"
 
         table = [None] * size
 
@@ -900,7 +898,7 @@
                 if incr > mask:
                     incr = incr ^ poly
 
-        print(n, "collisions")
+        print n, "collisions"
         self.collisions = n
 
         for i in range(len(table)):
@@ -931,8 +929,6 @@
     def dump(self, file, trace=0):
         # write data to file, as a C array
         size = getsize(self.data)
-        if trace:
-            print(self.name+":", size*len(self.data), "bytes", file=sys.stderr)
         file.write("static ")
         if size == 1:
             file.write("unsigned char")
@@ -980,12 +976,6 @@
     """
 
     import sys
-    if trace:
-        def dump(t1, t2, shift, bytes):
-            print("%d+%d bins at shift %d; %d bytes" % (
-                len(t1), len(t2), shift, bytes), file=sys.stderr)
-        print("Size of original table:", len(t)*getsize(t), \
-                            "bytes", file=sys.stderr)
     n = len(t)-1    # last valid index
     maxshift = 0    # the most we can shift n and still have something left
     if n > 0:
@@ -993,7 +983,7 @@
             n >>= 1
             maxshift += 1
     del n
-    bytes = sys.maxsize  # smallest total size so far
+    bytes_size = 2**31 - 1  # smallest total size so far
     t = tuple(t)    # so slices can be dict keys
     for shift in range(maxshift + 1):
         t1 = []
@@ -1010,15 +1000,10 @@
             t1.append(index >> shift)
         # determine memory size
         b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
-        if trace > 1:
-            dump(t1, t2, shift, b)
-        if b < bytes:
+        if b < bytes_size:
             best = t1, t2, shift
-            bytes = b
+            bytes_size = b
     t1, t2, shift = best
-    if trace:
-        print("Best:", end=' ', file=sys.stderr)
-        dump(t1, t2, shift, bytes)
     if __debug__:
         # exhaustively verify that the decomposition is correct
         mask = ~((~0) << shift) # i.e., low-bit mask of shift bits
--- Lib/token.py	2012-04-10 09:25:36.000000000 +1000
+++ Lib/token.py	2022-07-13 14:13:37.893821468 +1000
@@ -93,11 +93,7 @@
     outFileName = "Lib/token.py"
     if len(args) > 1:
         outFileName = args[1]
-    try:
-        fp = open(inFileName)
-    except IOError as err:
-        sys.stdout.write("I/O error: %s\n" % str(err))
-        sys.exit(1)
+    fp = open(inFileName)
     lines = fp.read().split("\n")
     fp.close()
     prog = re.compile(
@@ -114,7 +110,7 @@
     # load the output skeleton from the target:
     try:
         fp = open(outFileName)
-    except IOError as err:
+    except IOError:
         sys.stderr.write("I/O error: %s\n" % str(err))
         sys.exit(2)
     format = fp.read().split("\n")
@@ -131,7 +127,7 @@
     format[start:end] = lines
     try:
         fp = open(outFileName, 'w')
-    except IOError as err:
+    except IOError:
         sys.stderr.write("I/O error: %s\n" % str(err))
         sys.exit(4)
     fp.write("\n".join(format))