aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools
diff options
context:
space:
mode:
authorpaulsnar <paulsnar@paulsnar.lv>2022-09-01 15:14:53 +0300
committerpaulsnar <paulsnar@paulsnar.lv>2022-09-01 15:14:53 +0300
commita51b43b46ad701bebc89d498eb48126dc871d1f6 (patch)
tree4f4e5b8f740d900534bb945887f24243c2ef622a /tools
parentutf8: Move module to C (diff)
getline: Use a width lookup table for rune-monowidth
Diffstat (limited to 'tools')
-rw-r--r--tools/wchar_genlist.c22
-rw-r--r--tools/wchar_proclist.janet110
2 files changed, 132 insertions, 0 deletions
diff --git a/tools/wchar_genlist.c b/tools/wchar_genlist.c
new file mode 100644
index 0000000..90386e2
--- /dev/null
+++ b/tools/wchar_genlist.c
@@ -0,0 +1,22 @@
+#ifndef COSMOPOLITAN
+#define _XOPEN_SOURCE
+#include <wchar.h>
+#include <stdio.h>
+#include <stddef.h>
+#endif
+
+int main(void) {
+ int run_width = 1;
+ unsigned run_start;
+ for (unsigned i = 0; i <= 0x10FFFF; i += 1) {
+ int width = wcwidth((wchar_t)i);
+ if (width != run_width) {
+ if (run_width != 1) {
+ printf("%5x\t%5x\t%i\n", run_start, i - 1, run_width);
+ }
+ run_width = width;
+ run_start = i;
+ }
+ }
+ return 0;
+}
diff --git a/tools/wchar_proclist.janet b/tools/wchar_proclist.janet
new file mode 100644
index 0000000..cb25fa4
--- /dev/null
+++ b/tools/wchar_proclist.janet
@@ -0,0 +1,110 @@
+###
+### Generate the width_classes table for src/getline.c.
+###
+# Usage:
+#
+# 1. Compile tools/wchar_genlist.c. For the checked-in version, Cosmopolitan
+# libc was used to provide a reliable wcwidth.
+# 2. Pipe the output of the created binary into this script.
+# The generated output should be pasted into the k_width_classes[] array
+# in src/getline.c.
+
+(defn ingest-triples! []
+ (def wchars (:read stdin :all))
+ (def wchars (string/split "\n" wchars))
+ (array/pop wchars) # empty line
+ (def wchars
+ (map
+ (fn map-wchars [line]
+ (def triple (string/split "\t" line))
+ [(-> (triple 0) (string/triml) (scan-number 16))
+ (-> (triple 1) (string/triml) (scan-number 16))
+ (-> (triple 2) (scan-number 10))])
+ wchars))
+ wchars)
+
+(defn bitset/new [] @"\x80\0\0\0\0\0\0\0")
+(defn bitset/pos [pos]
+ (def pos (inc pos)) # top bit is masked
+ (when (>= pos 64) (errorf "pos %d out of range" pos))
+ [(brshift pos 3) (- 7 (band pos 7))])
+(defn bitset/test [mask pos]
+ (def [byte bit] (bitset/pos pos))
+ (def byte-val (mask byte))
+ (def bit-val (band byte-val (blshift 1 bit)))
+ (not= bit-val 0))
+(defn bitset/set [mask pos]
+ (def [byte bit] (bitset/pos pos))
+ (put mask byte
+ (bor (mask byte) (blshift 1 bit))))
+(defn bitset/clear [mask pos]
+ (def [byte bit] (bitset/pos pos))
+ (put mask byte
+ (band (mask byte) (bxor 0xFF (blshift 1 bit)))))
+(defn buffer->array [buf]
+ (def a (array/new (length buf)))
+ (each x buf
+ (array/push a x))
+ a)
+
+(defn coalesce! [triples]
+ (comment
+ ```
+ struct width_table_entry {
+ uint32_t start_point; // always a codepoint, used as a sorting key
+ uint32_t width;
+ // if top bit is set, bits 63..0 indicate for which codepoints after
+ // start_point this applies to
+ // otherwise the value is the literal end point
+ uint64_t end_point_or_bitmask;
+ };
+ ```)
+ (var -start nil)
+ (var -end nil)
+ (var -mask nil)
+ (var -width nil)
+ (var -coalesced nil)
+ (def entries @[])
+ (var i 0)
+
+ (defn begin [start end width]
+ (set -start start)
+ (set -end end)
+ (set -width width)
+ (set -coalesced 1)
+ (when (< (- end start) 63)
+ (set -mask (bitset/new))
+ (for i start (inc end)
+ (bitset/set -mask (- i start)))))
+ (defn flush [start end width]
+ (if (> -coalesced 1)
+ (array/push entries {:start -start :mask -mask :width -width})
+ (array/push entries {:start -start :end -end :width -width}))
+ (begin start end width))
+ (defn try-coalesce [start end]
+ (if (< (- end -start) 63)
+ (do
+ (for i start (inc end)
+ (bitset/set -mask (- i -start)))
+ (++ -coalesced)
+ true)
+ false))
+
+ (each [start end width] triples
+ (if -width
+ (if (not= width -width)
+ (flush start end width)
+ (if (try-coalesce start end)
+ (do) # noop
+ (flush start end width)))
+ (begin start end width)))
+
+ (each {:start start :end end :mask mask :width width} entries
+ (if mask
+ (printf "{ %6d, %2d, 0x%02x%02x%02x%02x%02x%02x%02x%02xULL },"
+ start width ;(buffer->array mask))
+ (printf "{ %6d, %2d, %18d }," start width end))))
+
+(defn main [&]
+ (print `/* AUTO-GENERATED BY tools/wchar_proclist.janet */`)
+ (coalesce! (ingest-triples!)))