From eeefb27f3e9c9b151e798ddffea0bfc636a790e4 Mon Sep 17 00:00:00 2001
From: Philipp Stadler <a51820432@unet.univie.ac.at>
Date: Thu, 21 Mar 2024 13:21:24 +0100
Subject: [PATCH] =?UTF-8?q?fix:=20Strichtypen=20f=C3=BCr=20einige=20tradit?=
 =?UTF-8?q?ionelle=20Zeichen=20mit=20Gras-Radikal=20hinzugef=C3=BCgt?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Strichtypen hinzugefügt unter anderem für:

* `葉`,
* `蓮`,
* `夢`,
* `觀`,
* `歡`,
* `莊`,
* `藥`,
* `蓋`,
* `藝`,
* `華`,
* `蔭`,
* `舊`,
* `蘭`,
* `蘋`,
* `蕩`,
* `蔣`,
* `蕭`,
* `薑`.
---
 build/gen-hanzi-data/stroke-type.ts | 52 +++++++++++++++++++++++++----
 test/data/stroke-type.test.ts       | 38 +++++++++++++++++++++
 2 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/build/gen-hanzi-data/stroke-type.ts b/build/gen-hanzi-data/stroke-type.ts
index 6e35b99..e6e3b93 100644
--- a/build/gen-hanzi-data/stroke-type.ts
+++ b/build/gen-hanzi-data/stroke-type.ts
@@ -3,7 +3,7 @@ import cncharOrder from 'cnchar-order'
 import cncharTrad from 'cnchar-trad'
 import {
   type StrokeTypeMaybeUnclean,
-  type StrokeType
+  StrokeType
 } from '../../src-common/stroke-encodings'
 import { strokeTypeOverrides } from './stroke-type-overrides'
 import { compat } from './blocks/index'
@@ -34,7 +34,11 @@ export function queryStrokeTypes (
   if (override !== undefined) {
     return override
   }
-  return queryCncharStrokeTypes(compatChar)
+  let strokes = queryCncharStrokeTypes(compatChar)
+  if (expectedLen !== undefined) {
+    strokes = addGrassHengIfMissing(expectedLen, strokes)
+  }
+  return strokes
 }
 
 function queryCncharStrokeTypes (char: string): StrokeType[] {
@@ -42,9 +46,43 @@ function queryCncharStrokeTypes (char: string): StrokeType[] {
   if (!Array.isArray(strokeShapes)) {
     return []
   }
-  // if cnchar has multiple stroke types, only keep first, and map to a closed
-  // set of strokes without duplicate stroke kinds
-  return strokeShapes.map(t => normalizeStroke(
-    t.split('|')[0] as StrokeTypeMaybeUnclean
-  ))
+  return strokeShapes.map(t =>
+    // if cnchar has multiple stroke types, only keep first, and map to a closed
+    // set of strokes without duplicate stroke kinds
+    normalizeStroke(
+      t.split('|')[0] as StrokeTypeMaybeUnclean
+    )
+  )
+}
+
+const traditionalGrassStrokes = [
+  StrokeType.SHU,
+  StrokeType.HENG,
+  StrokeType.SHU,
+  StrokeType.HENG
+]
+
+/**
+ * If the character has one stroke less than expected, and the first strokes
+ * are consistent with a simplified grass radical, then return a new array that
+ * starts with the four strokes of a traditional grass radical and then
+ * continues with the given strokes starting from the fourth stroke.
+ *
+ * If the length is not one short by one, or if the start does not look like a
+ * simplified grass radical, then return the given strokes array unchanged.
+ */
+function addGrassHengIfMissing (
+  expectedLen: number,
+  strokes: StrokeType[]
+): StrokeType[] {
+  const applyPatch =
+    // HanziWriter has one more stroke
+    (strokes.length + 1) === expectedLen &&
+    // and the strokes look like the simplified version
+    strokes[0] === StrokeType.HENG &&
+    strokes[1] === StrokeType.SHU &&
+    strokes[2] === StrokeType.SHU
+  return applyPatch
+    ? [...traditionalGrassStrokes, ...strokes.slice(3)]
+    : strokes
 }
diff --git a/test/data/stroke-type.test.ts b/test/data/stroke-type.test.ts
index c8da210..14cbb7c 100644
--- a/test/data/stroke-type.test.ts
+++ b/test/data/stroke-type.test.ts
@@ -1,6 +1,9 @@
 import { StrokeType } from '../../src-common/stroke-encodings'
 import { queryStrokeTypes } from '../../build/gen-hanzi-data/stroke-type'
 import { cjkRadicals, kangxiRadicals } from './radicals'
+import {
+  queryPatchedHanziWriterData
+} from '../../build/gen-hanzi-data/patched-hanzi-writer'
 
 describe('Kangxi radicals', () => {
   describe('have non-empty strokes', () => {
@@ -53,3 +56,38 @@ describe('CJK Radicals Supplement radicals', () => {
     expect(queryStrokeTypes(char)).toEqual(strokes)
   }
 })
+
+describe('Traditional characters with grass radical at the start', () => {
+  const chars = [
+    '葉',
+    '蓮',
+    '夢',
+    '觀',
+    '歡',
+    '莊',
+    '藥',
+    '蓋',
+    '藝',
+    '華',
+    '蔭',
+    '舊',
+    '蘭',
+    '蘋',
+    '蕩',
+    '蔣',
+    '蕭',
+    '薑'
+  ]
+  for (const char of chars) {
+    test(char, async () => {
+      const hanziWriterCount = (await queryPatchedHanziWriterData(char))
+        .strokes.length
+      expect(queryStrokeTypes(char, hanziWriterCount).slice(0, 4)).toEqual([
+        StrokeType.SHU,
+        StrokeType.HENG,
+        StrokeType.SHU,
+        StrokeType.HENG
+      ])
+    })
+  }
+})
-- 
GitLab