unicodeNormalization
This tag performs normalization on every unicode character using each normalization form. Then loops through the input and compares the normalized form to the input and replaces the input with the unicode character that transforms to it.
Created by: hackvertor
Installed 1 times
Category: Charsets
Created on: Thursday, September 26, 2024 at 7:22:20 AM
Updated on: Thursday, September 26, 2024 at 7:22:20 AM
Tag arguments
[]
Code
class unicodeNormalization {
encode(input) {
let output = [];
const unicodeCharacterCodes = Array.from(
{ length: 0x10ffff - 0x7f },
(_, i) => i + 0x7f + 1,
);
const normalizationForms = ["NFKC", "NFC", "NFD", "NFKD"];
unicodeCharacterCodes.forEach((code) =>
normalizationForms.forEach((form) => {
const chr = String.fromCodePoint(code);
const normalized = chr.normalize(form);
let chunk = [];
for (let i = 0; i < input.length; i++) {
if (input[i] === normalized) {
chunk.push(chr);
}
}
if (chunk.length) output.push(chunk.join(""));
}),
);
return output.join("");
}
}