From eeefb27f3e9c9b151e798ddffea0bfc636a790e4 Mon Sep 17 00:00:00 2001 From: Philipp Stadler <a51820432@unet.univie.ac.at> Date: Thu, 21 Mar 2024 13:21:24 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20Strichtypen=20f=C3=BCr=20einige=20tradit?= =?UTF-8?q?ionelle=20Zeichen=20mit=20Gras-Radikal=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strichtypen hinzugefügt unter anderem für: * `葉`, * `蓮`, * `夢`, * `觀`, * `歡`, * `莊`, * `藥`, * `蓋`, * `藝`, * `華`, * `蔭`, * `舊`, * `蘭`, * `蘋`, * `蕩`, * `蔣`, * `蕭`, * `薑`. --- build/gen-hanzi-data/stroke-type.ts | 52 +++++++++++++++++++++++++---- test/data/stroke-type.test.ts | 38 +++++++++++++++++++++ 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/build/gen-hanzi-data/stroke-type.ts b/build/gen-hanzi-data/stroke-type.ts index 6e35b99..e6e3b93 100644 --- a/build/gen-hanzi-data/stroke-type.ts +++ b/build/gen-hanzi-data/stroke-type.ts @@ -3,7 +3,7 @@ import cncharOrder from 'cnchar-order' import cncharTrad from 'cnchar-trad' import { type StrokeTypeMaybeUnclean, - type StrokeType + StrokeType } from '../../src-common/stroke-encodings' import { strokeTypeOverrides } from './stroke-type-overrides' import { compat } from './blocks/index' @@ -34,7 +34,11 @@ export function queryStrokeTypes ( if (override !== undefined) { return override } - return queryCncharStrokeTypes(compatChar) + let strokes = queryCncharStrokeTypes(compatChar) + if (expectedLen !== undefined) { + strokes = addGrassHengIfMissing(expectedLen, strokes) + } + return strokes } function queryCncharStrokeTypes (char: string): StrokeType[] { @@ -42,9 +46,43 @@ function queryCncharStrokeTypes (char: string): StrokeType[] { if (!Array.isArray(strokeShapes)) { return [] } - // if cnchar has multiple stroke types, only keep first, and map to a closed - // set of strokes without duplicate stroke kinds - return strokeShapes.map(t => normalizeStroke( - t.split('|')[0] as StrokeTypeMaybeUnclean - )) + return strokeShapes.map(t => + // if cnchar has multiple stroke types, only keep first, and map to a closed + // set of strokes without duplicate stroke kinds + normalizeStroke( + t.split('|')[0] as StrokeTypeMaybeUnclean + ) + ) +} + +const traditionalGrassStrokes = [ + StrokeType.SHU, + StrokeType.HENG, + StrokeType.SHU, + StrokeType.HENG +] + +/** + * If the character has one stroke less than expected, and the first strokes + * are consistent with a simplified grass radical, then return a new array that + * starts with the four strokes of a traditional grass radical and then + * continues with the given strokes starting from the fourth stroke. + * + * If the length is not one short by one, or if the start does not look like a + * simplified grass radical, then return the given strokes array unchanged. + */ +function addGrassHengIfMissing ( + expectedLen: number, + strokes: StrokeType[] +): StrokeType[] { + const applyPatch = + // HanziWriter has one more stroke + (strokes.length + 1) === expectedLen && + // and the strokes look like the simplified version + strokes[0] === StrokeType.HENG && + strokes[1] === StrokeType.SHU && + strokes[2] === StrokeType.SHU + return applyPatch + ? [...traditionalGrassStrokes, ...strokes.slice(3)] + : strokes } diff --git a/test/data/stroke-type.test.ts b/test/data/stroke-type.test.ts index c8da210..14cbb7c 100644 --- a/test/data/stroke-type.test.ts +++ b/test/data/stroke-type.test.ts @@ -1,6 +1,9 @@ import { StrokeType } from '../../src-common/stroke-encodings' import { queryStrokeTypes } from '../../build/gen-hanzi-data/stroke-type' import { cjkRadicals, kangxiRadicals } from './radicals' +import { + queryPatchedHanziWriterData +} from '../../build/gen-hanzi-data/patched-hanzi-writer' describe('Kangxi radicals', () => { describe('have non-empty strokes', () => { @@ -53,3 +56,38 @@ describe('CJK Radicals Supplement radicals', () => { expect(queryStrokeTypes(char)).toEqual(strokes) } }) + +describe('Traditional characters with grass radical at the start', () => { + const chars = [ + '葉', + '蓮', + '夢', + '觀', + '歡', + '莊', + '藥', + '蓋', + '藝', + '華', + '蔭', + '舊', + '蘭', + '蘋', + '蕩', + '蔣', + '蕭', + '薑' + ] + for (const char of chars) { + test(char, async () => { + const hanziWriterCount = (await queryPatchedHanziWriterData(char)) + .strokes.length + expect(queryStrokeTypes(char, hanziWriterCount).slice(0, 4)).toEqual([ + StrokeType.SHU, + StrokeType.HENG, + StrokeType.SHU, + StrokeType.HENG + ]) + }) + } +}) -- GitLab