diff options
| author | 2022-09-01 15:14:53 +0300 | |
|---|---|---|
| committer | 2022-09-01 15:14:53 +0300 | |
| commit | a51b43b46ad701bebc89d498eb48126dc871d1f6 (patch) | |
| tree | 4f4e5b8f740d900534bb945887f24243c2ef622a /tools | |
| parent | utf8: Move module to C (diff) | |
getline: Use a width lookup table for rune-monowidth
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/wchar_genlist.c | 22 | ||||
| -rw-r--r-- | tools/wchar_proclist.janet | 110 |
2 files changed, 132 insertions, 0 deletions
diff --git a/tools/wchar_genlist.c b/tools/wchar_genlist.c new file mode 100644 index 0000000..90386e2 --- /dev/null +++ b/tools/wchar_genlist.c @@ -0,0 +1,22 @@ +#ifndef COSMOPOLITAN +#define _XOPEN_SOURCE +#include <wchar.h> +#include <stdio.h> +#include <stddef.h> +#endif + +int main(void) { + int run_width = 1; + unsigned run_start; + for (unsigned i = 0; i <= 0x10FFFF; i += 1) { + int width = wcwidth((wchar_t)i); + if (width != run_width) { + if (run_width != 1) { + printf("%5x\t%5x\t%i\n", run_start, i - 1, run_width); + } + run_width = width; + run_start = i; + } + } + return 0; +} diff --git a/tools/wchar_proclist.janet b/tools/wchar_proclist.janet new file mode 100644 index 0000000..cb25fa4 --- /dev/null +++ b/tools/wchar_proclist.janet @@ -0,0 +1,110 @@ +### +### Generate the width_classes table for src/getline.c. +### +# Usage: +# +# 1. Compile tools/wchar_genlist.c. For the checked-in version, Cosmopolitan +# libc was used to provide a reliable wcwidth. +# 2. Pipe the output of the created binary into this script. +# The generated output should be pasted into the k_width_classes[] array +# in src/getline.c. + +(defn ingest-triples! [] + (def wchars (:read stdin :all)) + (def wchars (string/split "\n" wchars)) + (array/pop wchars) # empty line + (def wchars + (map + (fn map-wchars [line] + (def triple (string/split "\t" line)) + [(-> (triple 0) (string/triml) (scan-number 16)) + (-> (triple 1) (string/triml) (scan-number 16)) + (-> (triple 2) (scan-number 10))]) + wchars)) + wchars) + +(defn bitset/new [] @"\x80\0\0\0\0\0\0\0") +(defn bitset/pos [pos] + (def pos (inc pos)) # top bit is masked + (when (>= pos 64) (errorf "pos %d out of range" pos)) + [(brshift pos 3) (- 7 (band pos 7))]) +(defn bitset/test [mask pos] + (def [byte bit] (bitset/pos pos)) + (def byte-val (mask byte)) + (def bit-val (band byte-val (blshift 1 bit))) + (not= bit-val 0)) +(defn bitset/set [mask pos] + (def [byte bit] (bitset/pos pos)) + (put mask byte + (bor (mask byte) (blshift 1 bit)))) +(defn bitset/clear [mask pos] + (def [byte bit] (bitset/pos pos)) + (put mask byte + (band (mask byte) (bxor 0xFF (blshift 1 bit))))) +(defn buffer->array [buf] + (def a (array/new (length buf))) + (each x buf + (array/push a x)) + a) + +(defn coalesce! [triples] + (comment + ``` + struct width_table_entry { + uint32_t start_point; // always a codepoint, used as a sorting key + uint32_t width; + // if top bit is set, bits 63..0 indicate for which codepoints after + // start_point this applies to + // otherwise the value is the literal end point + uint64_t end_point_or_bitmask; + }; + ```) + (var -start nil) + (var -end nil) + (var -mask nil) + (var -width nil) + (var -coalesced nil) + (def entries @[]) + (var i 0) + + (defn begin [start end width] + (set -start start) + (set -end end) + (set -width width) + (set -coalesced 1) + (when (< (- end start) 63) + (set -mask (bitset/new)) + (for i start (inc end) + (bitset/set -mask (- i start))))) + (defn flush [start end width] + (if (> -coalesced 1) + (array/push entries {:start -start :mask -mask :width -width}) + (array/push entries {:start -start :end -end :width -width})) + (begin start end width)) + (defn try-coalesce [start end] + (if (< (- end -start) 63) + (do + (for i start (inc end) + (bitset/set -mask (- i -start))) + (++ -coalesced) + true) + false)) + + (each [start end width] triples + (if -width + (if (not= width -width) + (flush start end width) + (if (try-coalesce start end) + (do) # noop + (flush start end width))) + (begin start end width))) + + (each {:start start :end end :mask mask :width width} entries + (if mask + (printf "{ %6d, %2d, 0x%02x%02x%02x%02x%02x%02x%02x%02xULL }," + start width ;(buffer->array mask)) + (printf "{ %6d, %2d, %18d }," start width end)))) + +(defn main [&] + (print `/* AUTO-GENERATED BY tools/wchar_proclist.janet */`) + (coalesce! (ingest-triples!))) |
