summaryrefslogtreecommitdiffstats
path: root/tdemarkdown/md4c/scripts/build_punct_map.py
diff options
context:
space:
mode:
Diffstat (limited to 'tdemarkdown/md4c/scripts/build_punct_map.py')
-rw-r--r--tdemarkdown/md4c/scripts/build_punct_map.py66
1 files changed, 66 insertions, 0 deletions
diff --git a/tdemarkdown/md4c/scripts/build_punct_map.py b/tdemarkdown/md4c/scripts/build_punct_map.py
new file mode 100644
index 000000000..13102f26f
--- /dev/null
+++ b/tdemarkdown/md4c/scripts/build_punct_map.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import textwrap
+
+
+self_path = os.path.dirname(os.path.realpath(__file__));
+f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r")
+
+codepoint_list = []
+category_list = [ "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" ]
+
+# Filter codepoints falling in the right category:
+for line in f:
+ comment_off = line.find("#")
+ if comment_off >= 0:
+ line = line[:comment_off]
+ line = line.strip()
+ if not line:
+ continue
+
+ char_range, category = line.split(";")
+ char_range = char_range.strip()
+ category = category.strip()
+
+ if not category in category_list:
+ continue
+
+ delim_off = char_range.find("..")
+ if delim_off >= 0:
+ codepoint0 = int(char_range[:delim_off], 16)
+ codepoint1 = int(char_range[delim_off+2:], 16)
+ for codepoint in range(codepoint0, codepoint1 + 1):
+ codepoint_list.append(codepoint)
+ else:
+ codepoint = int(char_range, 16)
+ codepoint_list.append(codepoint)
+f.close()
+
+
+codepoint_list.sort()
+
+
+index0 = 0
+count = len(codepoint_list)
+
+records = list()
+while index0 < count:
+ index1 = index0 + 1
+ while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1:
+ index1 += 1
+
+ if index1 - index0 > 1:
+ # Range of codepoints
+ records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
+ else:
+ # Single codepoint
+ records.append("S(0x{:04x})".format(codepoint_list[index0]))
+
+ index0 = index1
+
+sys.stdout.write("static const unsigned PUNCT_MAP[] = {\n")
+sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
+ initial_indent = " ", subsequent_indent=" ")))
+sys.stdout.write("\n};\n\n")