Regular Expressions: The Guide I Always Wanted (2026)

#javascript #beginners #programming #tutorial

Regular Expressions: The Guide I Always Wanted (2026)

Regex doesn't have to be intimidating. Here's how I actually use it day to day.

The Mindset

Regex is a pattern matching language.
It's not code you write once and understand.
It's a tool you build up muscle memory for.

Start with simple patterns.
Use a visualizer (I'll share my favorites).
Gradually add complexity.

You don't need to memorize everything — just know what's possible.

The Essentials

Character Classes

// Literal characters
/abc/.test('abcdef')        // true — exact match
/hello/.test('Hello')       // false — case sensitive!

// Dot = any single character (except newline)
/a.c/.test('abc')           // true
/a.c/.test('aXc')           // true
/a.c/.test('ac')            // false — dot matches exactly one char

// Character classes
/[aeiou]/.test('hello')     // true — contains any vowel
/[0-9]/.test('price: $42')  // true — contains any digit
/[a-zA-Z]/.test('abc123')   // true — contains any letter
/[a-zA-Z0-9_]/.test('var_1') // true — word character (\w)

// Shorthand classes
/\d/.test('42')             // digit [0-9]
/\D/.test('hello')          // non-digit [^0-9]
/\w/.test('hello_123')      // word char [a-zA-Z0-9_]
/\W/.test('!@#$')           // non-word char
/\s/.test('hello world')    // whitespace (space, tab, newline)
/\S/.test('hello')          // non-whitespace

Anchors

/^hello/.test('hello world')  // true — starts with hello
/world$/.test('hello world')  // true — ends with world
/^hello$/.test('hello')       // true — exactly "hello"
/^hello$/.test('hello world') // false

// Word boundaries
/\bword\b/.test('a word here')      // true — standalone word
/\bword\b/.test('swordfish')        // false — not at boundary

Quantifiers

// ? = zero or one
/colou?r/.test('color')   // true
/colou?r/.test('colour')  // true

// * = zero or more
/ab*c/.test('ac')         // true (zero b's)
/ab*c/.test('abbc')       // true (two b's)

// + = one or more
/ab+c/.test('ac')         // false (need at least one b)
/ab+c/.test('abc')        // true

// {n} = exactly n times
/\d{4}/.test('2026')      // true — 4 digits
/\d{4}/.test('26')        // false

// {n,m} = between n and m times
/\d{2,4}/.test('26')      // true
/\d{4,8}/.test('20260523') // true (6 digits)

// Greedy vs Lazy (important!)
/<.+>/.test('<div><p>')   // Matches ENTIRE string (greedy)
/<.+?>/.test('<div><p>')  // Matches only '<div>' (lazy)
// Add ? after quantifier to make it lazy

Practical Examples You'll Actually Use

Email Validation

// Practical (not RFC-compliant, but good enough)
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;

emailRegex.test('user@example.com');     // true
emailRegex.test('user.name+tag@domain.co.uk'); // true
emailRegex.test('user@');                // false
emailRegex.test('@example.com');         // false
emailRegex.test('user example.com');     // false (has space)

// Note: For production, use a library like validator.js
// Regex email validation always has edge cases

Password Strength Check

function checkPassword(password) {
  const checks = {
    length: /.{8,}/,
    uppercase: /[A-Z]/,
    lowercase: /[a-z]/,
    digit: /\d/,
    special: /[!@#$%^&*()_+\-=[\]{};':"\\|,.<>\/?]/,
  };

  const results = {};
  let score = 0;

  for (const [name, regex] of Object.entries(checks)) {
    results[name] = regex.test(password);
    if (results[name]) score++;
  }

  return {
    valid: Object.values(results).every(Boolean),
    score: score, // 0-5
    details: results,
    strength: score <= 2 ? 'weak' : score <= 3 ? 'medium' : score === 4 ? 'strong' : 'very strong',
  };
}

checkPassword('password');
// { valid: false, score: 2, strength: 'weak', ... }

checkPassword('P@ssw0rd!');
// { valid: true, score: 5, strength: 'very strong', ... }

URL Parsing

const urlRegex = /^(https?):\/\/([^\/]+)(\/[^\s]*)?$/i;

function parseUrl(url) {
  const match = url.match(urlRegex);
  if (!match) return null;

  return {
    protocol: match[1],
    hostname: match[2],
    path: match[3] || '/',
  };
}

parseUrl('https://dev.to/armorbreak/article');
// { protocol: 'https', hostname: 'dev.to', path: '/armorbreak/article' }

Extract Data from Strings

// Extract numbers from text
const text = 'Order #12345 for $299.99 shipped on 2026-05-23';
const numbers = text.match(/\d+(\.\d+)?/g);
console.log(numbers); // ['12345', '299.99', '2026', '05', '23']

// Extract hashtags
const tweet = 'Learning #regex and #javascript today!';
const hashtags = tweet.match(/#\w+/g);
console.log(hashtags); // ['#regex', '#javascript']

// Extract words in quotes
const str = 'He said "hello world" and left';
const quotes = str.match(/"([^"]*)"/g)?.map(s => s.slice(1, -1));
console.log(quotes); // ['hello world']

// Parse log line format
const logLine = '[2026-05-23 10:30:15] ERROR: Connection failed (code: 503)';
const logPattern = /^\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\] (\w+): (.+)/;
const parsed = logLine.match(logPattern);
if (parsed) {
  console.log({ timestamp: parsed[1], level: parsed[2], message: parsed[3] });
}
// { timestamp: '2026-05-23 10:30:15', level: 'ERROR', message: 'Connection failed (code: 503)' }

Search & Replace

// CamelCase to snake_case
function camelToSnake(str) {
  return str.replace(/[A-Z]/g, letter => `_${letter.toLowerCase()}`);
}
camelToSnake('myVariableName'); // 'my_variable_name'

// snake_case to camelCase
function snakeToCamel(str) {
  return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
}
snakeToCamel('my_variable_name'); // 'myVariableName'

// Format phone number
function formatPhone(phone) {
  return phone.replace(/(\d{3})(\d{3})(\d{4})/, '($1) $2-$3');
}
formatPhone('5551234567'); // '(555) 123-4567'

// Remove extra whitespace
'  hello   world  '.replace(/\s+/g, ' ').trim(); // 'hello world'

// Mask credit card
'4111111111111111'.replace(/\d(?=\d{4})/g, '*'); // '************1111'

Groups & Capturing

// Capturing groups — extract parts of the match
const date = 'May 23, 2026';
const datePattern = /(\w+) (\d{1,2}), (\d{4})/;
const match = date.match(datePattern);

match[0]; // "May 23, 2026" (full match)
match[1]; // "Month" (group 1)
match[2]; // "23" (group 2)
match[3]; // "2026" (group 3)

// Named groups (ES2018+, much more readable!)
const dateNamed = /(?<month>\w+) (?<day>\d{1,2}), (?<year>\d{4})/;
const namedMatch = date.exec(date);

namedMatch.groups.month; // "May"
namedMatch.groups.day;   // "23"
namedMatch.groups.year;  // "2026"

// Non-capturing group (?:...) — group without capturing
/(?:https?:\/\/)?(?:www\.)?example\.com/
// Groups exist for grouping but don't appear in results

// Lookahead (assert without consuming)
// Positive lookahead: X(?=Y) — X followed by Y
/\d+(?= dollars)/.test('100 dollars'); // true
// Negative lookahead: X(?!Y) — X NOT followed by Y
/\d+(?! dollars)/.test('100 euros');  // true

// Lookbehind (ES2018+)
// Positive lookbehind: (?<=Y)X — X preceded by Y
/(?<=\$)\d+/.test('$100'); // true
// Negative lookbehind: (?<!Y)X — X NOT preceded by Y
/(?<!\$)\d+/.test('100');  // true (no $ before 100)

Flags

// g — global (find all matches, not just first)
'aaa'.match(/a/g);  // ['a', 'a', 'a']
'aaa'.match(/a/);   // ['a'] (only first)

// i — case insensitive
/HELLO/i.test('hello'); // true

// m — multiline (^ and $ match per-line)
/^line/.test('first\nline two\nthird'); // false
/^line/m.test('first\nline two\nthird'); // true (matches start of line 2)

// s — dotAll (dot matches newlines too)
/.*<end>/.s.test('multi\nline\n<end>'); // true

// Combine flags
/pattern/gims

Common Patterns Library

const patterns = {
  // Validation
  email: /^[^\s@]+@[^\s@]+\.[^\s@]+$/,
  url: /^https?:\/\/[^\s]+$/i,
  ipV4: /^(\d{1,3}\.){3}\d{1,3}$/,
  hexColor: /^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$/,
  macAddress: /^([0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}$/,
  uuid: /^[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-4[0-9A-Fa-f]{3}-[89ABab][0-9A-Fa-f]{3}-[0-9A-Fa-f]{12}$/i,

  // Extraction
  phoneUS: /\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/,
  hashtag: /#\w+/g,
  mention: /@\w+/g,
  price: /\$\d+(?:\.\d{2})?/g,
  time24h: /(2[0-3]|[01]?[0-9]):([0-5][0-9])/,

  // Code-specific
  semicolonMissing: /[^;{}]\n\s*\n\s*(let|const|var|return|import|export|function)/m,
  consoleLog: /console\.(log|debug|info|warn|error)/,
  todoComment: /\/\/\s*TODO[:\s]|#\s*TODO[:\s]/i,
};

// Usage
patterns.email.validate = (str) => patterns.email.test(str);
patterns.price.extract = (str) => str.match(patterns.price.g || patterns.price) || [];

Testing & Debugging Regex

// Use Node.js built-in test
const regex = /pattern/;
regex.test('string');  // boolean
string.match(regex);   // array or null
string.replace(regex, 'replacement');
string.split(regex);   // array
string.search(regex);  // index or -1

// Test multiple cases quickly
const testCases = [
  ['user@example.com', true],
  ['', false],
  ['invalid', false],
  ['@no-user.com', false],
];

for (const [input, expected] of testCases) {
  const result = patterns.email.test(input);
  const status = result === expected ? '✅' : '❌';
  console.log(`${status} "${input}" → ${result}`);
}

// Online tools I use:
// → regex101.com (best overall, explains each part)
// → regexr.com (great visual highlighting)
// → debuggex.com (interactive railroad diagram)

Performance Tips

// ❌ Catastrophic backtracking (can freeze your app!)
// This can take exponential time on certain inputs:
 /^(a+)+$/.test('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaX');

// ✅ Use possessive quantifiers or atomic groups when possible
// Or restructure the pattern to avoid nested quantifiers

// ❌ Slow: Compiling regex in loops
for (const item of items) {
  const match = item.match(/pattern/); // Recompiles every iteration!
}

// ✅ Fast: Compile once, reuse
const pattern = /pattern/;
for (const item of items) {
  const match = item.match(pattern); // Uses compiled version
}

// ⚠️ Be careful with .match() and global flag
'aaa'.match(/a/g); // ['a', 'a', 'a']
'bbb'.match(/a/g); // null (not empty array!)
// Always null-check or use ?? []

Quick Reference

Pattern	Meaning
`.`	Any char except newline
`\d`, `\D`	Digit / Non-digit
`\w`, `\W`	Word char / Non-word
`\s`, `\S`	Whitespace / Non-whitespace
`^`, `$`	Start / End of string
`\b`	Word boundary
`*`, `+`, `?`	0+, 1+, 0/1
`{n}`, `{n,m}`	Exact / Range count
`(…)`	Capture group
`(?:…)`	Non-capturing group
`(?<name>…)`	Named group
`(?=…)`, `(?!…)`	Lookahead
`(?<=…)`, `(?<!…)`	Lookbehind
`\1`, `\2`	Backreference to group
`g`, `i`, `m`, `s`	Flags