unicodeNormalization

This tag performs normalization on every unicode character using each normalization form. Then loops through the input and compares the normalized form to the input and replaces the input with the unicode character that transforms to it.

Created by: hackvertor
Installed 1 times

Category: Charsets

Created on: Thursday, September 26, 2024 at 7:22:20 AM

Updated on: Friday, December 20, 2024 at 8:08:04 PM

This is a built in tag
Tag arguments
[]
Code
class unicodeNormalization {
  encode(input) {
    let output = [];
    const unicodeCharacterCodes = Array.from(
      { length: 0x10ffff - 0x7f },
      (_, i) => i + 0x7f + 1,
    );
    const normalizationForms = ["NFKC", "NFC", "NFD", "NFKD"];
    unicodeCharacterCodes.forEach((code) =>
      normalizationForms.forEach((form) => {
        const chr = String.fromCodePoint(code);
        const normalized = chr.normalize(form);
        let chunk = [];
        for (let i = 0; i < input.length; i++) {
          if (input[i] === normalized) {
            chunk.push(chr);
          }
        }
        if (chunk.length) output.push(chunk.join(""));
      }),
    );
    return output.join("");
  }
}