···11+import { describe, expect, it } from 'vitest';
22+import { graphemeLength } from './grapheme';
33+44+describe( 'graphemeLength', () => {
55+ it( 'counts ASCII characters one-for-one', () => {
66+ expect( graphemeLength( 'hello' ) ).toBe( 5 );
77+ } );
88+99+ it( 'counts an emoji as a single grapheme', () => {
1010+ // "๐" is 2 UTF-16 code units / 4 UTF-8 bytes but one grapheme.
1111+ expect( graphemeLength( '๐' ) ).toBe( 1 );
1212+ } );
1313+1414+ it( 'counts a multi-codepoint emoji cluster as one grapheme', () => {
1515+ // Family emoji = several codepoints joined by ZWJ, still one grapheme.
1616+ expect( graphemeLength( '๐จโ๐ฉโ๐ง' ) ).toBe( 1 );
1717+ } );
1818+1919+ it( 'returns 0 for the empty string', () => {
2020+ expect( graphemeLength( '' ) ).toBe( 0 );
2121+ } );
2222+} );
+13
src/lib/publish/grapheme.ts
···11+/**
22+ * Count graphemes (user-perceived characters) the way Bluesky counts toward its
33+ * 300-character post limit โ emoji and ZWJ clusters count as one, not as their
44+ * UTF-16 length or byte length.
55+ */
66+export function graphemeLength( text: string ): number {
77+ const segmenter = new Intl.Segmenter( undefined, { granularity: 'grapheme' } );
88+ let count = 0;
99+ for ( const _segment of segmenter.segment( text ) ) {
1010+ count++;
1111+ }
1212+ return count;
1313+}