javascript - Transform patterns of code points in a binary -
i need code points different categories , transform them binary pattern. i'm using unicodeset site getting code points using set [:age=5.0:]&[[:gc=l:][:gc=nl:][:gc=mn:][:gc=mc:][:gc=nd:][:gc=pc:]\$_].
i've written little thing transforming result returned unicodeset site binary pattern. i've tested simple things [$a-z] , results on expect:
01 24 // $ 00 61 00 // 7a 00 // z it worked fine non-bmp characters. however, using set mentioned first, [:age=5.0:]&[[:gc=l:][:gc=nl:][:gc=mn:][:gc=mc:][:gc=nd:][:gc=pc:]\$_], code points after u+2fa1d don't appear on copy-paste site. however, if check escape checkbox, appear, results in different forms.
is there way solve that? or maybe there appropriate browser ? final part of result shown in browser 𪘀-], is, u+2fa1d , -].
'use strict'; /* * generates binary pattern matching code point in * list generated by: * * https://unicode.org/cldr/utility/list-unicodeset.jsp */ const fs = require('fs'); const path = require('path'); const input = fs.readfilesync( path.join(process.argv[2], process.argv[3]), 'utf8'); const result = new buffer(497028); let resultoffset = 0; let cpx , cpy; let chxhigh , chxlow , chyhigh , chylow; let surx , sury; const len = (input.length) - 1; (let offset = 1; offset < len;) { // gets left character. cpx = input.codepointat(offset); // skips escape. if (cpx === 0x5c) cpx = input.codepointat(++offset); // handles x surrogates. chxhigh = input.charcodeat(offset); if (usingsurrogates(chxhigh)) { chxlow = input.charcodeat(offset + 1); surx = 2; } else surx = 1; offset += surx; // y... cpy = input.codepointat(offset); // handles x-y interval. if (cpy === 0x2d) { ++offset; // skips escape. if (cpy === 0x5c) cpy = input.codepointat(++offset); // handles y surrogates. cpy = input.codepointat(offset); chyhigh = input.charcodeat(offset); if (usingsurrogates(chyhigh)) { chylow = input.charcodeat(offset + 1); sury = 2; } else sury = 1; offset += sury; // copy interval result. result.writeuint8(0, resultoffset); ++resultoffset; // copy x... result.writeuint16le(chxhigh, resultoffset); if (surx === 2) { result.writeuint16le(chxlow, resultoffset + 2) resultoffset += 4; } else resultoffset += 2; // copy y... result.writeuint16le(chyhigh, resultoffset); if (sury === 2) { result.writeuint16le(chylow, resultoffset + 2); resultoffset += 4; } else resultoffset += 2; } else { result.writeuint8(1, resultoffset); ++resultoffset; // copy x... result.writeuint16le(chxhigh, resultoffset); if (surx === 2) { result.writeuint16le(chxlow, resultoffset + 2) resultoffset += 4; } else resultoffset += 2; } } function usingsurrogates(ch) { return (ch >= 0xd800) && (ch <= 0xdbff); } fs.writefilesync(path.join(__dirname, 'output'), result.slice(0, resultoffset));
Comments
Post a Comment