DEV Community

Matt Ellen-Tsivintzeli
Matt Ellen-Tsivintzeli

Posted on • Edited on

2

Parse me a numeric html entity

I saw the following question:

I am using wordpress rest api and am getting encoded title strings from the server. I want to decode the string before I use it to replace the document.title.

Wordpress api

{
 "id": 698,
 "title": {
  "rendered": "Ludovico Einaudi – “Divenire”"
 },
}

actions.js

export default {
  updateDocTitle ({ state,

It's been closed as a dupe, but that didn't discourage me from wanting to figure it out for myself.

So, the challenge is: for any given string input, replace any numerically represented html entities with the correct character.

Remember, the largest codepoint is 0x10ffff.

Some test cases:

'ö_ö' // expected 'ö_ö'
'Hello &&&#x;'  // expected 'Hello &&&#x;'
'&#x123 ģ'  // expected '&#x123 ģ'
'�'    // expected '�'
Enter fullscreen mode Exit fullscreen mode

I'll post my attempt below!

Top comments (2)

Collapse
 
mellen profile image
Matt Ellen-Tsivintzeli • Edited

This is how I solved it:

const NORMAL = 0;
const FOUNDAMP = 1;
const FOUNDHASH = 2;
const FOUNDX = 3;
const FOUNDHEX = 4;
const GOTENTITY = 5;
const FOUNDDEC = 6;
function replaceNumericEntities(input)
{
const hexdigits = '0123456789abcdefABCDEF';
const digits = '0123456789';
let output = '';
let entity = '';
let isHex = false;
let state = NORMAL;
for(let c of input+' ')
{
switch(state)
{
case NORMAL:
if(c === '&')
{
state = FOUNDAMP;
entity += c;
}
else
{
output += c;
}
break;
case FOUNDAMP:
if(c === '#')
{
state = FOUNDHASH;
entity += c
}
else
{
({output, entity, state} = reset(c, entity, output));
}
break;
case FOUNDHASH:
{
if(c === 'x')
{
state = FOUNDX;
isHex = true;
entity += c;
}
else if(digits.indexOf(c) === -1)
{
({output, entity, state} = reset(c, entity, output));
}
else
{
state = FOUNDDEC;
isHex = false;
entity += c;
}
}
break;
case FOUNDX:
{
if(hexdigits.indexOf(c) === -1)
{
({output, entity, state} = reset(c, entity, output));
}
else
{
state = FOUNDHEX;
entity += c;
}
}
break;
case FOUNDHEX:
{
if(c === ';')
{
state = GOTENTITY;
}
else if(hexdigits.indexOf(c) === -1)
{
({output, entity, state} = reset(c, entity, output));
}
else
{
state = FOUNDHEX;
entity += c;
}
}
break;
case GOTENTITY:
{
if((isHex && withinHexRange(entity)) || (!isHex && withinDecRange(entity)))
{
output += getCharacter(entity, isHex);
}
else
{
output += entity;
}
({output, entity, state} = reset(c, '', output));
}
break;
case FOUNDDEC:
{
if (c === ';')
{
state = GOTENTITY;
}
else if(digits.indexOf(c) === -1)
{
({output, entity, state} = reset(c, entity, output));
}
else
{
state = FOUNDDEC;
entity += c;
}
}
break;
}
}
return output.slice(0, -1);
}
function reset(c, entity, output)
{
let state = 0;
output += entity;
entity = '';
if(c === '&')
{
state = FOUNDAMP;
entity = c
}
else
{
state = NORMAL;
output += c;
}
return {output: output, entity: entity, state: state};
}
function withinHexRange(entity)
{
let value = Number.parseInt(entity.slice(3), 16);
return value <= 0x10ffff;
}
function withinDecRange(entity)
{
let value = Number.parseInt(entity.slice(2), 10);
return value <= 0x10ffff;
}
function getCharacter(entity, isHex)
{
let value = 0;
if(isHex)
{
value = Number.parseInt(entity.slice(3), 16);
}
else
{
value = Number.parseInt(entity.slice(2), 10);
}
let cha = String.fromCodePoint(value);
return cha;
}

There must be a less verbose way :D

Collapse
 
mellen profile image
Matt Ellen-Tsivintzeli • Edited

Created a little Grease Monkey script to use it:

// ==UserScript==
// @name     HTML Entity Replacer
// @version  1
// @include  http*
// @grant    none
// @require  https://gist.githubusercontent.com/Mellen/ce5d247587dcb7b49e5145a6f49328be/raw/16871d379ea7afe4063ac3646851ae35cbc010df/replaceNumericEntities.js
// @run-at   document-end
// ==/UserScript==


inputs = Array.from(document.querySelectorAll('input[type=text], textarea'));
inputs.forEach(input => 
               {
                 input.addEventListener('keyup', e =>      
                                                 {
                                                   e.target.value = replaceNumericEntities(e.target.value);
                                                 });
               });

A Workflow Copilot. Tailored to You.

Pieces.app image

Our desktop app, with its intelligent copilot, streamlines coding by generating snippets, extracting code from screenshots, and accelerating problem-solving.

Read the docs