I’m making some code editor, and I use regular expressions to colorize some words. I want to accept all characters, but I want to divide some characters like numbers, {}, (), keywords (int, bool, …). So I tried this regular expression:
/([a-zA-Z_]w*|d+|[(){}#[]!"$%&/=?*+-;,:.<>@#\|€÷פ߸¨'˝´˙`˛°˘^ˇ~])|(s+)/g
And output with it is:
["#", "include", " ", "int", "(","bool", ")", "01", {", "while", "}", "0", "/", "/"]
So as I mentioned, I want every number divided: ["0", "1"]
,
keywords: ["int, "bool"]
,
brackets: ["(", ")", "{", "}"]
,
includes: ["#include", "#ifdef", "#ifndef", "#define"]
The first problem at includes is that RegEx maybe cannot accept full words like #include, so if anyone can show me how to get all text after # to some point, like include (prove me wrong),
if it can’t, I will use to color like this ["#", "include"],
And last, the second problem is with a comment, I don’t want to mark one per one on the line, beacuse divison /
, and I want to detect it like this:
["//", "/*", "*/"]
And I want to color the whole line for //
, and the whole text until end of "BIG" comment like this:
/*text text text
text text text
text text text*/
Also very important, IT NEEDS TO ACCEPT SPACE. Sorry for different topics!
With my regular expression:
Expected:
JavaScript, HTML, CSS, cpp.ge.js:
var editor = document.getElementById("editor");
editor.addEventListener("input", handleInput);
function getCaretPosition() {
var caretOffset = 0;
var selection = window.getSelection();
if (selection.rangeCount > 0) {
var range = selection.getRangeAt(0).cloneRange();
range.selectNodeContents(editor);
range.setEnd(selection.focusNode, selection.focusOffset);
caretOffset = range.toString().length;
}
return caretOffset;
}
function setCaretPosition(position) {
var selection = window.getSelection();
var range = document.createRange();
var textNodes = getTextNodes(editor);
var currentNode = null;
var charCount = 0;
for (var i = 0; i < textNodes.length; i++) {
var node = textNodes[i];
var nodeLength = node.textContent.length;
if (position <= charCount + nodeLength) {
currentNode = node;
break;
}
charCount += nodeLength;
}
if (currentNode) {
var offset = position - charCount;
range.setStart(currentNode, offset);
range.collapse(true);
selection.removeAllRanges();
selection.addRange(range);
}
}
function getTextNodes(node) {
var textNodes = [];
function traverse(node) {
if (node.nodeType === Node.TEXT_NODE) {
textNodes.push(node);
} else {
var childNodes = node.childNodes;
for (var i = 0; i < childNodes.length; i++) {
traverse(childNodes[i]);
}
}
}
traverse(node);
return textNodes;
}
function handleInput() {
var userInput = editor.textContent;
var currentPosition = getCaretPosition();
var formattedContent = formatContent(userInput);
editor.innerHTML = formattedContent;
setCaretPosition(currentPosition);
}
function formatContent(input) {
var formattedHTML = "";
var regex = /([a-zA-Z_]w*|d+|[(){}#[]!"$%&/=?*+-;,:.<>@#\|€÷פ߸¨'˝´˙`˛°˘^ˇ~])|(s+)/g;
var tokens = input.match(regex);
console.log(tokens);
if (tokens) {
for (var i = 0; i < tokens.length; i++) {
var token = tokens[i];
if (keywords.indexOf(token.trim()) > -1) {
formattedHTML += "<span class='keywords'>" + token + "</span>";
} else if (brackets.indexOf(token.trim()) > -1) {
formattedHTML += "<span class='brackets'>" + token + "</span>";
} else if (comNum.indexOf(token.trim()) > -1) {
formattedHTML += "<span class='comNum'>" + token + "</span>";
} else if (defs.indexOf(token.trim()) > -1) {
formattedHTML += "<span class='defs'>" + token + "</span>";
} else {
formattedHTML += token;
}
}
}
return formattedHTML;
}
<!DOCTYPE html>
<html>
<head>
<title>ge - The Graphical Editor</title>
<link rel="stylesheet" href="style.css">
</head>
<body>
<div id="editor" contenteditable></div>
</body>
<script src="cpp.ge.js"></script>
<script src="script.js"></script>
</html>
#editor {
position: fixed;
padding: 0;
margin: 0;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: #1e1e1e;
color: white;
font-size: 14px;
font-family: monospace;
}
.keywords {
color: #569cd6;
}
.brackets {
color: #ffd700;
}
.comNum {
color: #608b4e;
}
.defs {
color: #da70d6;
}
//START CPP
var keywords = [
'abstract',
'amp',
'array',
'auto',
'bool',
'break',
'case',
'catch',
'char',
'class',
'const',
'constexpr',
'const_cast',
'continue',
'cpu',
'decltype',
'default',
'delegate',
'delete',
'do',
'double',
'dynamic_cast',
'each',
'else',
'enum',
'event',
'explicit',
'export',
'extern',
'false',
'final',
'finally',
'float',
'for',
'friend',
'gcnew',
'generic',
'goto',
'if',
'in',
'initonly',
'inline',
'int',
'interface',
'interior_ptr',
'internal',
'literal',
'long',
'mutable',
'namespace',
'new',
'noexcept',
'nullptr',
'__nullptr',
'operator',
'override',
'partial',
'pascal',
'pin_ptr',
'private',
'property',
'protected',
'public',
'ref',
'register',
'reinterpret_cast',
'restrict',
'return',
'safe_cast',
'sealed',
'short',
'signed',
'sizeof',
'static',
'static_assert',
'static_cast',
'struct',
'switch',
'template',
'this',
'thread_local',
'throw',
'tile_static',
'true',
'try',
'typedef',
'typeid',
'typename',
'union',
'unsigned',
'using',
'virtual',
'void',
'volatile',
'wchar_t',
'where',
'while',
'_asm', // reserved word with one underscores
'_based',
'_cdecl',
'_declspec',
'_fastcall',
'_if_exists',
'_if_not_exists',
'_inline',
'_multiple_inheritance',
'_pascal',
'_single_inheritance',
'_stdcall',
'_virtual_inheritance',
'_w64',
'__abstract', // reserved word with two underscores
'__alignof',
'__asm',
'__assume',
'__based',
'__box',
'__builtin_alignof',
'__cdecl',
'__clrcall',
'__declspec',
'__delegate',
'__event',
'__except',
'__fastcall',
'__finally',
'__forceinline',
'__gc',
'__hook',
'__identifier',
'__if_exists',
'__if_not_exists',
'__inline',
'__int128',
'__int16',
'__int32',
'__int64',
'__int8',
'__interface',
'__leave',
'__m128',
'__m128d',
'__m128i',
'__m256',
'__m256d',
'__m256i',
'__m512',
'__m512d',
'__m512i',
'__m64',
'__multiple_inheritance',
'__newslot',
'__nogc',
'__noop',
'__nounwind',
'__novtordisp',
'__pascal',
'__pin',
'__pragma',
'__property',
'__ptr32',
'__ptr64',
'__raise',
'__restrict',
'__resume',
'__sealed',
'__single_inheritance',
'__stdcall',
'__super',
'__thiscall',
'__try',
'__try_cast',
'__typeof',
'__unaligned',
'__unhook',
'__uuidof',
'__value',
'__virtual_inheritance',
'__w64',
'__wchar_t'
];
var brackets = ["(", ")", "()", "{", "}", "{}"];
var comNum = ["//", "/*", "*/", , "/**/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"];
var defs = ["#include", "#ifdef", "#ifndef", "#define", "#endif"];
//END CPP
2
Answers
Thanks to Debby Sinkalu for the answer, I implemented his comment classification system. I made regular expression that look like this:
To achieve the desired coloring and formatting of your code editor, you can update the formatContent function to handle comments and special characters as well. Additionally, you can modify the regular expression to match the comment patterns and special characters. Here’s an updated version of the formatContent function:
comments (//) and multi-line comments (/* … /). When the editor encounters //, it will apply the comment class to color the whole line. Similarly, when it encounters /, it will continue marking the text until it finds */, considering it as a multi-line comment.
Remember to update your CSS to style the new comment class accordingly:
With these changes, your code editor should now properly handle comments, special characters, and keywords with the desired color and formatting.