484 lines
18 KiB
Diff
484 lines
18 KiB
Diff
SPDX-FileCopyrightText: 2022 fosslinux <fosslinux@aussies.space>
|
|
|
|
SPDX-License-Identifier: PSF-2.0
|
|
|
|
We are building Python 3 using Python 2 as our bootstrap. But
|
|
makeunicodedata has been converted to Python 3. We need to
|
|
convert back, particularly print statements, and writing to
|
|
files.
|
|
|
|
We only apply this to the first build.
|
|
|
|
--- Tools/unicode/makeunicodedata.py 2012-04-10 09:25:37.000000000 +1000
|
|
+++ Tools/unicode/makeunicodedata.py 2022-07-13 14:13:37.864821008 +1000
|
|
@@ -67,7 +67,7 @@
|
|
|
|
def maketables(trace=0):
|
|
|
|
- print("--- Reading", UNICODE_DATA % "", "...")
|
|
+ print "--- Reading", UNICODE_DATA % "", "..."
|
|
|
|
version = ""
|
|
unicode = UnicodeData(UNICODE_DATA % version,
|
|
@@ -76,15 +76,15 @@
|
|
DERIVED_CORE_PROPERTIES % version,
|
|
DERIVEDNORMALIZATION_PROPS % version)
|
|
|
|
- print(len(list(filter(None, unicode.table))), "characters")
|
|
+ print len(list(filter(None, unicode.table))), "characters"
|
|
|
|
for version in old_versions:
|
|
- print("--- Reading", UNICODE_DATA % ("-"+version), "...")
|
|
+ print "--- Reading", UNICODE_DATA % ("-"+version) + "..."
|
|
old_unicode = UnicodeData(UNICODE_DATA % ("-"+version),
|
|
COMPOSITION_EXCLUSIONS % ("-"+version),
|
|
EASTASIAN_WIDTH % ("-"+version),
|
|
DERIVED_CORE_PROPERTIES % ("-"+version))
|
|
- print(len(list(filter(None, old_unicode.table))), "characters")
|
|
+ print len(list(filter(None, old_unicode.table))), "characters"
|
|
merge_old_version(version, unicode, old_unicode)
|
|
|
|
makeunicodename(unicode, trace)
|
|
@@ -103,7 +103,7 @@
|
|
|
|
FILE = "Modules/unicodedata_db.h"
|
|
|
|
- print("--- Preparing", FILE, "...")
|
|
+ print "--- Preparing", FILE, "..."
|
|
|
|
# 1) database properties
|
|
|
|
@@ -214,92 +214,90 @@
|
|
l = comp_last[l]
|
|
comp_data[f*total_last+l] = char
|
|
|
|
- print(len(table), "unique properties")
|
|
- print(len(decomp_prefix), "unique decomposition prefixes")
|
|
- print(len(decomp_data), "unique decomposition entries:", end=' ')
|
|
- print(decomp_size, "bytes")
|
|
- print(total_first, "first characters in NFC")
|
|
- print(total_last, "last characters in NFC")
|
|
- print(len(comp_pairs), "NFC pairs")
|
|
+ print len(table), "unique properties"
|
|
+ print len(decomp_prefix), "unique decomposition prefixes"
|
|
+ print len(decomp_data), "unique decomposition entries:",
|
|
+ print decomp_size, "bytes"
|
|
+ print total_first, "first characters in NFC"
|
|
+ print total_last, "last characters in NFC"
|
|
+ print len(comp_pairs), "NFC pairs"
|
|
|
|
- print("--- Writing", FILE, "...")
|
|
+ print "--- Writing", FILE, "..."
|
|
|
|
fp = open(FILE, "w")
|
|
- print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
|
|
- print(file=fp)
|
|
- print('#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION, file=fp)
|
|
- print("/* a list of unique database records */", file=fp)
|
|
- print("const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {", file=fp)
|
|
+ fp.write("/* this file was generated by %s %s */\n\n" % (SCRIPT, VERSION))
|
|
+ fp.write('#define UNIDATA_VERSION "%s"\n' % UNIDATA_VERSION)
|
|
+ fp.write("/* a list of unique database records */\n")
|
|
+ fp.write("const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {\n")
|
|
for item in table:
|
|
- print(" {%d, %d, %d, %d, %d, %d}," % item, file=fp)
|
|
- print("};", file=fp)
|
|
- print(file=fp)
|
|
-
|
|
- print("/* Reindexing of NFC first characters. */", file=fp)
|
|
- print("#define TOTAL_FIRST",total_first, file=fp)
|
|
- print("#define TOTAL_LAST",total_last, file=fp)
|
|
- print("struct reindex{int start;short count,index;};", file=fp)
|
|
- print("static struct reindex nfc_first[] = {", file=fp)
|
|
+ fp.write(" {%d, %d, %d, %d, %d, %d},\n" % item)
|
|
+ fp.write("};\n\n")
|
|
+
|
|
+ fp.write("/* Reindexing of NFC first characters. */\n")
|
|
+ fp.write("#define TOTAL_FIRST %d \n" % total_first)
|
|
+ fp.write("#define TOTAL_LAST %d \n" % total_last)
|
|
+ fp.write("struct reindex{int start;short count,index;};\n")
|
|
+ fp.write("static struct reindex nfc_first[] = {\n")
|
|
for start,end in comp_first_ranges:
|
|
- print(" { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp)
|
|
- print(" {0,0,0}", file=fp)
|
|
- print("};\n", file=fp)
|
|
- print("static struct reindex nfc_last[] = {", file=fp)
|
|
+ fp.write(" { %d, %d, %d},\n" % (start,end-start,comp_first[start]))
|
|
+ fp.write(" {0,0,0}\n")
|
|
+ fp.write("};\n")
|
|
+ fp.write("static struct reindex nfc_last[] = {\n")
|
|
for start,end in comp_last_ranges:
|
|
- print(" { %d, %d, %d}," % (start,end-start,comp_last[start]), file=fp)
|
|
- print(" {0,0,0}", file=fp)
|
|
- print("};\n", file=fp)
|
|
+ fp.write(" { %d, %d, %d},\n" % (start,end-start,comp_last[start]))
|
|
+ fp.write(" {0,0,0}\n")
|
|
+ fp.write("};\n")
|
|
|
|
# FIXME: <fl> the following tables could be made static, and
|
|
# the support code moved into unicodedatabase.c
|
|
|
|
- print("/* string literals */", file=fp)
|
|
- print("const char *_PyUnicode_CategoryNames[] = {", file=fp)
|
|
+ fp.write("/* string literals */")
|
|
+ fp.write("const char *_PyUnicode_CategoryNames[] = {")
|
|
for name in CATEGORY_NAMES:
|
|
- print(" \"%s\"," % name, file=fp)
|
|
- print(" NULL", file=fp)
|
|
- print("};", file=fp)
|
|
+ fp.write(" \"%s\",\n" % name)
|
|
+ fp.write(" NULL\n")
|
|
+ fp.write("};\n")
|
|
|
|
- print("const char *_PyUnicode_BidirectionalNames[] = {", file=fp)
|
|
+ fp.write("const char *_PyUnicode_BidirectionalNames[] = {\n")
|
|
for name in BIDIRECTIONAL_NAMES:
|
|
- print(" \"%s\"," % name, file=fp)
|
|
- print(" NULL", file=fp)
|
|
- print("};", file=fp)
|
|
+ fp.write(" \"%s\",\n" % name)
|
|
+ fp.write(" NULL\n")
|
|
+ fp.write("};\n")
|
|
|
|
- print("const char *_PyUnicode_EastAsianWidthNames[] = {", file=fp)
|
|
+ fp.write("const char *_PyUnicode_EastAsianWidthNames[] = {\n")
|
|
for name in EASTASIANWIDTH_NAMES:
|
|
- print(" \"%s\"," % name, file=fp)
|
|
- print(" NULL", file=fp)
|
|
- print("};", file=fp)
|
|
+ fp.write(" \"%s\",\n" % name)
|
|
+ fp.write(" NULL\n")
|
|
+ fp.write("};\n")
|
|
|
|
- print("static const char *decomp_prefix[] = {", file=fp)
|
|
+ fp.write("static const char *decomp_prefix[] = {\n")
|
|
for name in decomp_prefix:
|
|
- print(" \"%s\"," % name, file=fp)
|
|
- print(" NULL", file=fp)
|
|
- print("};", file=fp)
|
|
+ fp.write(" \"%s\",\n" % name)
|
|
+ fp.write(" NULL\n")
|
|
+ fp.write("};\n")
|
|
|
|
# split record index table
|
|
index1, index2, shift = splitbins(index, trace)
|
|
|
|
- print("/* index tables for the database records */", file=fp)
|
|
- print("#define SHIFT", shift, file=fp)
|
|
+ fp.write("/* index tables for the database records */\n")
|
|
+ fp.write("#define SHIFT %d\n" % shift)
|
|
Array("index1", index1).dump(fp, trace)
|
|
Array("index2", index2).dump(fp, trace)
|
|
|
|
# split decomposition index table
|
|
index1, index2, shift = splitbins(decomp_index, trace)
|
|
|
|
- print("/* decomposition data */", file=fp)
|
|
+ fp.write("/* decomposition data */\n")
|
|
Array("decomp_data", decomp_data).dump(fp, trace)
|
|
|
|
- print("/* index tables for the decomposition data */", file=fp)
|
|
- print("#define DECOMP_SHIFT", shift, file=fp)
|
|
+ fp.write("/* index tables for the decomposition data */\n")
|
|
+ fp.write("#define DECOMP_SHIFT %d\n" % shift)
|
|
Array("decomp_index1", index1).dump(fp, trace)
|
|
Array("decomp_index2", index2).dump(fp, trace)
|
|
|
|
index, index2, shift = splitbins(comp_data, trace)
|
|
- print("/* NFC pairs */", file=fp)
|
|
- print("#define COMP_SHIFT", shift, file=fp)
|
|
+ fp.write("/* NFC pairs */\n")
|
|
+ fp.write("#define COMP_SHIFT %d\n" % shift)
|
|
Array("comp_index", index).dump(fp, trace)
|
|
Array("comp_data", index2).dump(fp, trace)
|
|
|
|
@@ -316,30 +314,30 @@
|
|
index[i] = cache[record] = len(records)
|
|
records.append(record)
|
|
index1, index2, shift = splitbins(index, trace)
|
|
- print("static const change_record change_records_%s[] = {" % cversion, file=fp)
|
|
+ fp.write("static const change_record change_records_%s[] = {\n" % cversion)
|
|
for record in records:
|
|
- print("\t{ %s }," % ", ".join(map(str,record)), file=fp)
|
|
- print("};", file=fp)
|
|
- Array("changes_%s_index" % cversion, index1).dump(fp, trace)
|
|
- Array("changes_%s_data" % cversion, index2).dump(fp, trace)
|
|
- print("static const change_record* get_change_%s(Py_UCS4 n)" % cversion, file=fp)
|
|
- print("{", file=fp)
|
|
- print("\tint index;", file=fp)
|
|
- print("\tif (n >= 0x110000) index = 0;", file=fp)
|
|
- print("\telse {", file=fp)
|
|
- print("\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift), file=fp)
|
|
- print("\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \
|
|
- (cversion, shift, ((1<<shift)-1)), file=fp)
|
|
- print("\t}", file=fp)
|
|
- print("\treturn change_records_%s+index;" % cversion, file=fp)
|
|
- print("}\n", file=fp)
|
|
- print("static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion, file=fp)
|
|
- print("{", file=fp)
|
|
- print("\tswitch(n) {", file=fp)
|
|
+ fp.write("\t{ %s },\n" % ", ".join(map(str,record)))
|
|
+ fp.write("};\n")
|
|
+ Array("changes_%s_index\n" % cversion, index1).dump(fp, trace)
|
|
+ Array("changes_%s_data\n" % cversion, index2).dump(fp, trace)
|
|
+ fp.write("static const change_record* get_change_%s(Py_UCS4 n)\n" % cversion)
|
|
+ fp.write("{\n")
|
|
+ fp.write("\tint index;\n")
|
|
+ fp.write("\tif (n >= 0x110000) index = 0;\n")
|
|
+ fp.write("\telse {\n")
|
|
+ fp.write("\t\tindex = changes_%s_index[n>>%d];\n" % (cversion, shift))
|
|
+ fp.write("\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];\n" % \
|
|
+ (cversion, shift, ((1<<shift)-1)))
|
|
+ fp.write("\t}\n")
|
|
+ fp.write("\treturn change_records_%s+index;\n" % cversion)
|
|
+ fp.write("}\n\n")
|
|
+ fp.write("static Py_UCS4 normalization_%s(Py_UCS4 n)\n" % cversion)
|
|
+ fp.write("{\n")
|
|
+ fp.write("\tswitch(n) {\n")
|
|
for k, v in normalization:
|
|
- print("\tcase %s: return 0x%s;" % (hex(k), v), file=fp)
|
|
- print("\tdefault: return 0;", file=fp)
|
|
- print("\t}\n}\n", file=fp)
|
|
+ fp.write("\tcase %s: return 0x%s;\n" % (hex(k), v))
|
|
+ fp.write("\tdefault: return 0;\n")
|
|
+ fp.write("\t}\n}\n\n")
|
|
|
|
fp.close()
|
|
|
|
@@ -350,7 +348,7 @@
|
|
|
|
FILE = "Objects/unicodetype_db.h"
|
|
|
|
- print("--- Preparing", FILE, "...")
|
|
+ print "--- Preparing", FILE, "..."
|
|
|
|
# extract unicode types
|
|
dummy = (0, 0, 0, 0, 0, 0)
|
|
@@ -433,25 +431,25 @@
|
|
table.append(item)
|
|
index[char] = i
|
|
|
|
- print(len(table), "unique character type entries")
|
|
+ print len(table), "unique character type entries"
|
|
|
|
- print("--- Writing", FILE, "...")
|
|
+ print "--- Writing", FILE, "..."
|
|
|
|
fp = open(FILE, "w")
|
|
- print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
|
|
- print(file=fp)
|
|
- print("/* a list of unique character type descriptors */", file=fp)
|
|
- print("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {", file=fp)
|
|
+ fp.write("/* this file was generated by %s %s */\n" % (SCRIPT, VERSION))
|
|
+ fp.write("\n")
|
|
+ fp.write("/* a list of unique character type descriptors */\n")
|
|
+ fp.write("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {\n")
|
|
for item in table:
|
|
- print(" {%d, %d, %d, %d, %d, %d}," % item, file=fp)
|
|
- print("};", file=fp)
|
|
- print(file=fp)
|
|
+ fp.write(" {%d, %d, %d, %d, %d, %d},\n" % item)
|
|
+ fp.write("};\n")
|
|
+ fp.write("\n")
|
|
|
|
# split decomposition index table
|
|
index1, index2, shift = splitbins(index, trace)
|
|
|
|
- print("/* type indexes */", file=fp)
|
|
- print("#define SHIFT", shift, file=fp)
|
|
+ fp.write("/* type indexes */\n")
|
|
+ fp.write("#define SHIFT %d\n" % shift)
|
|
Array("index1", index1).dump(fp, trace)
|
|
Array("index2", index2).dump(fp, trace)
|
|
|
|
@@ -464,7 +462,7 @@
|
|
|
|
FILE = "Modules/unicodename_db.h"
|
|
|
|
- print("--- Preparing", FILE, "...")
|
|
+ print "--- Preparing", FILE, "..."
|
|
|
|
# collect names
|
|
names = [None] * len(unicode.chars)
|
|
@@ -476,7 +474,7 @@
|
|
if name and name[0] != "<":
|
|
names[char] = name + chr(0)
|
|
|
|
- print(len(list(n for n in names if n is not None)), "distinct names")
|
|
+ print len(list(n for n in names if n is not None)), "distinct names"
|
|
|
|
# collect unique words from names (note that we differ between
|
|
# words inside a sentence, and words ending a sentence. the
|
|
@@ -497,7 +495,7 @@
|
|
else:
|
|
words[w] = [len(words)]
|
|
|
|
- print(n, "words in text;", b, "bytes")
|
|
+ print n, "words in text;", b, "bytes"
|
|
|
|
wordlist = list(words.items())
|
|
|
|
@@ -511,19 +509,19 @@
|
|
escapes = 0
|
|
while escapes * 256 < len(wordlist):
|
|
escapes = escapes + 1
|
|
- print(escapes, "escapes")
|
|
+ print escapes, "escapes"
|
|
|
|
short = 256 - escapes
|
|
|
|
assert short > 0
|
|
|
|
- print(short, "short indexes in lexicon")
|
|
+ print short, "short indexes in lexicon"
|
|
|
|
# statistics
|
|
n = 0
|
|
for i in range(short):
|
|
n = n + len(wordlist[i][1])
|
|
- print(n, "short indexes in phrasebook")
|
|
+ print n, "short indexes in phrasebook"
|
|
|
|
# pick the most commonly used words, and sort the rest on falling
|
|
# length (to maximize overlap)
|
|
@@ -592,29 +590,29 @@
|
|
|
|
codehash = Hash("code", data, 47)
|
|
|
|
- print("--- Writing", FILE, "...")
|
|
+ print "--- Writing", FILE, "..."
|
|
|
|
fp = open(FILE, "w")
|
|
- print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
|
|
- print(file=fp)
|
|
- print("#define NAME_MAXLEN", 256, file=fp)
|
|
- print(file=fp)
|
|
- print("/* lexicon */", file=fp)
|
|
+ fp.write("/* this file was generated by %s %s */\n" % (SCRIPT, VERSION))
|
|
+ fp.write("\n")
|
|
+ fp.write("#define NAME_MAXLEN 256")
|
|
+ fp.write("\n")
|
|
+ fp.write("/* lexicon */\n")
|
|
Array("lexicon", lexicon).dump(fp, trace)
|
|
Array("lexicon_offset", lexicon_offset).dump(fp, trace)
|
|
|
|
# split decomposition index table
|
|
offset1, offset2, shift = splitbins(phrasebook_offset, trace)
|
|
|
|
- print("/* code->name phrasebook */", file=fp)
|
|
- print("#define phrasebook_shift", shift, file=fp)
|
|
- print("#define phrasebook_short", short, file=fp)
|
|
+ fp.write("/* code->name phrasebook */\n")
|
|
+ fp.write("#define phrasebook_shift %d\n" % shift)
|
|
+ fp.write("#define phrasebook_short %d\n" % short)
|
|
|
|
Array("phrasebook", phrasebook).dump(fp, trace)
|
|
Array("phrasebook_offset1", offset1).dump(fp, trace)
|
|
Array("phrasebook_offset2", offset2).dump(fp, trace)
|
|
|
|
- print("/* name->code dictionary */", file=fp)
|
|
+ fp.write("/* name->code dictionary */\n")
|
|
codehash.dump(fp, trace)
|
|
|
|
fp.close()
|
|
@@ -868,7 +866,7 @@
|
|
else:
|
|
raise AssertionError("ran out of polynomials")
|
|
|
|
- print(size, "slots in hash table")
|
|
+ print size, "slots in hash table"
|
|
|
|
table = [None] * size
|
|
|
|
@@ -900,7 +898,7 @@
|
|
if incr > mask:
|
|
incr = incr ^ poly
|
|
|
|
- print(n, "collisions")
|
|
+ print n, "collisions"
|
|
self.collisions = n
|
|
|
|
for i in range(len(table)):
|
|
@@ -931,8 +929,6 @@
|
|
def dump(self, file, trace=0):
|
|
# write data to file, as a C array
|
|
size = getsize(self.data)
|
|
- if trace:
|
|
- print(self.name+":", size*len(self.data), "bytes", file=sys.stderr)
|
|
file.write("static ")
|
|
if size == 1:
|
|
file.write("unsigned char")
|
|
@@ -980,12 +976,6 @@
|
|
"""
|
|
|
|
import sys
|
|
- if trace:
|
|
- def dump(t1, t2, shift, bytes):
|
|
- print("%d+%d bins at shift %d; %d bytes" % (
|
|
- len(t1), len(t2), shift, bytes), file=sys.stderr)
|
|
- print("Size of original table:", len(t)*getsize(t), \
|
|
- "bytes", file=sys.stderr)
|
|
n = len(t)-1 # last valid index
|
|
maxshift = 0 # the most we can shift n and still have something left
|
|
if n > 0:
|
|
@@ -993,7 +983,7 @@
|
|
n >>= 1
|
|
maxshift += 1
|
|
del n
|
|
- bytes = sys.maxsize # smallest total size so far
|
|
+ bytes_size = 2**31 - 1 # smallest total size so far
|
|
t = tuple(t) # so slices can be dict keys
|
|
for shift in range(maxshift + 1):
|
|
t1 = []
|
|
@@ -1010,15 +1000,10 @@
|
|
t1.append(index >> shift)
|
|
# determine memory size
|
|
b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
|
|
- if trace > 1:
|
|
- dump(t1, t2, shift, b)
|
|
- if b < bytes:
|
|
+ if b < bytes_size:
|
|
best = t1, t2, shift
|
|
- bytes = b
|
|
+ bytes_size = b
|
|
t1, t2, shift = best
|
|
- if trace:
|
|
- print("Best:", end=' ', file=sys.stderr)
|
|
- dump(t1, t2, shift, bytes)
|
|
if __debug__:
|
|
# exhaustively verify that the decomposition is correct
|
|
mask = ~((~0) << shift) # i.e., low-bit mask of shift bits
|
|
--- Lib/token.py 2012-04-10 09:25:36.000000000 +1000
|
|
+++ Lib/token.py 2022-07-13 14:13:37.893821468 +1000
|
|
@@ -93,11 +93,7 @@
|
|
outFileName = "Lib/token.py"
|
|
if len(args) > 1:
|
|
outFileName = args[1]
|
|
- try:
|
|
- fp = open(inFileName)
|
|
- except IOError as err:
|
|
- sys.stdout.write("I/O error: %s\n" % str(err))
|
|
- sys.exit(1)
|
|
+ fp = open(inFileName)
|
|
lines = fp.read().split("\n")
|
|
fp.close()
|
|
prog = re.compile(
|
|
@@ -114,7 +110,7 @@
|
|
# load the output skeleton from the target:
|
|
try:
|
|
fp = open(outFileName)
|
|
- except IOError as err:
|
|
+ except IOError:
|
|
sys.stderr.write("I/O error: %s\n" % str(err))
|
|
sys.exit(2)
|
|
format = fp.read().split("\n")
|
|
@@ -131,7 +127,7 @@
|
|
format[start:end] = lines
|
|
try:
|
|
fp = open(outFileName, 'w')
|
|
- except IOError as err:
|
|
+ except IOError:
|
|
sys.stderr.write("I/O error: %s\n" % str(err))
|
|
sys.exit(4)
|
|
fp.write("\n".join(format))
|