DEV Community

Tech Believers
Tech Believers

Posted on

Building a Heading Tags Analyzer: HTML Parsing and Hierarchy Validation

Introduction

Heading tags (H1-H6) provide semantic structure to web content, but many sites have broken hierarchies that confuse search engines and hurt SEO. In this guide, we'll build a comprehensive heading tags analyzer that extracts, validates, and visualizes heading structure.

Understanding Heading Tag Hierarchy

Proper heading structure follows a logical outline:

<h1>Main Title</h1>
  <h2>Section 1</h2>
    <h3>Subsection 1.1</h3>
    <h3>Subsection 1.2</h3>
  <h2>Section 2</h2>
    <h3>Subsection 2.1</h3>
      <h4>Detail 2.1.1</h4>
Enter fullscreen mode Exit fullscreen mode

This creates a tree structure that both search engines and screen readers can navigate.

Building the Analyzer

Step 1: Extracting Headings

function extractHeadings(html) {
    const parser = new DOMParser();
    const doc = parser.parseFromString(html, 'text/html');

    const headings = [];
    const headingTags = doc.querySelectorAll('h1, h2, h3, h4, h5, h6');

    headingTags.forEach((heading, index) => {
        headings.push({
            level: parseInt(heading.tagName[1]),
            text: heading.textContent.trim(),
            tag: heading.tagName.toLowerCase(),
            index: index,
            position: getElementPosition(heading)
        });
    });

    return headings;
}

function getElementPosition(element) {
    let position = 0;
    let current = element;

    while (current.previousElementSibling) {
        position++;
        current = current.previousElementSibling;
    }

    return position;
}
Enter fullscreen mode Exit fullscreen mode

Step 2: Hierarchy Validation

function validateHierarchy(headings) {
    const issues = [];

    // Check for H1 count
    const h1Count = headings.filter(h => h.level === 1).length;

    if (h1Count === 0) {
        issues.push({
            type: 'missing_h1',
            severity: 'error',
            message: 'No H1 tag found. Every page should have exactly one H1.'
        });
    } else if (h1Count > 1) {
        issues.push({
            type: 'multiple_h1',
            severity: 'warning',
            message: `Found ${h1Count} H1 tags. Best practice is one H1 per page.`,
            headings: headings.filter(h => h.level === 1)
        });
    }

    // Check for skipped levels
    for (let i = 1; i < headings.length; i++) {
        const current = headings[i];
        const previous = headings[i - 1];

        if (current.level > previous.level + 1) {
            issues.push({
                type: 'skipped_level',
                severity: 'error',
                message: `Skipped from ${previous.tag} to ${current.tag}. Should not skip heading levels.`,
                from: previous,
                to: current
            });
        }
    }

    // Check for empty headings
    headings.forEach(heading => {
        if (heading.text.length === 0) {
            issues.push({
                type: 'empty_heading',
                severity: 'error',
                message: `Empty ${heading.tag} tag found.`,
                heading
            });
        }
    });

    // Check for very long headings
    headings.forEach(heading => {
        if (heading.level === 1 && heading.text.length > 70) {
            issues.push({
                type: 'long_h1',
                severity: 'warning',
                message: `H1 is ${heading.text.length} characters (over 70 recommended).`,
                heading
            });
        }
    });

    // Check for generic headings
    const genericTerms = ['introduction', 'more information', 'details', 'content', 'information', 'conclusion'];
    headings.forEach(heading => {
        const lowerText = heading.text.toLowerCase();
        if (genericTerms.some(term => lowerText === term || lowerText.includes(term + ':') || lowerText.includes(term + ' -'))) {
            issues.push({
                type: 'generic_heading',
                severity: 'warning',
                message: `Generic heading "${heading.text}" provides little semantic value.`,
                heading
            });
        }
    });

    return issues;
}
Enter fullscreen mode Exit fullscreen mode

Step 3: Hierarchy Visualization

function visualizeHierarchy(headings) {
    let output = '';

    headings.forEach(heading => {
        const indent = '  '.repeat(heading.level - 1);
        output += `${indent}${heading.tag.toUpperCase()}: ${heading.text}\n`;
    });

    return output;
}

// Generate tree structure
function buildHeadingTree(headings) {
    const root = { children: [], level: 0 };
    const stack = [root];

    headings.forEach(heading => {
        const node = {
            ...heading,
            children: []
        };

        // Pop stack until we find parent level
        while (stack.length > 0 && stack[stack.length - 1].level >= heading.level) {
            stack.pop();
        }

        // Add to parent's children
        if (stack.length > 0) {
            stack[stack.length - 1].children.push(node);
        }

        // Push to stack
        stack.push(node);
    });

    return root.children;
}
Enter fullscreen mode Exit fullscreen mode

Step 4: SEO Analysis

function analyzeSEO(headings) {
    const analysis = {
        h1Keywords: [],
        questionBasedH2s: 0,
        headingDensity: 0,
        averageHeadingLength: 0,
        recommendations: []
    };

    // Extract H1 keywords
    const h1s = headings.filter(h => h.level === 1);
    if (h1s.length === 1) {
        analysis.h1Keywords = extractKeywords(h1s[0].text);
    }

    // Count question-based H2s
    const h2s = headings.filter(h => h.level === 2);
    analysis.questionBasedH2s = h2s.filter(h => 
        h.text.match(/^(what|why|how|when|where|which|who)/i)
    ).length;

    // Calculate heading density (headings per page)
    analysis.headingDensity = headings.length;

    // Average heading length
    const totalLength = headings.reduce((sum, h) => sum + h.text.length, 0);
    analysis.averageHeadingLength = Math.round(totalLength / headings.length);

    // Generate recommendations
    if (h1s.length === 0) {
        analysis.recommendations.push('Add an H1 tag containing your primary keyword');
    } else if (h1s.length > 1) {
        analysis.recommendations.push('Consolidate to one H1 per page for clarity');
    }

    if (analysis.questionBasedH2s === 0 && h2s.length > 0) {
        analysis.recommendations.push('Consider using question-based H2s for featured snippet eligibility');
    }

    if (analysis.headingDensity < 3) {
        analysis.recommendations.push('Add more headings to improve content structure (aim for one heading per 150-300 words)');
    } else if (analysis.headingDensity > 20) {
        analysis.recommendations.push('Too many headings may dilute their value. Consolidate where appropriate.');
    }

    return analysis;
}

function extractKeywords(text) {
    // Simple keyword extraction (can be enhanced with NLP)
    return text.toLowerCase()
        .split(/\s+/)
        .filter(word => word.length > 3)
        .slice(0, 5);
}
Enter fullscreen mode Exit fullscreen mode

Complete Analyzer Class

class HeadingTagsAnalyzer {
    constructor(html) {
        this.html = html;
        this.headings = extractHeadings(html);
        this.issues = validateHierarchy(this.headings);
        this.tree = buildHeadingTree(this.headings);
        this.seoAnalysis = analyzeSEO(this.headings);
    }

    getReport() {
        return {
            summary: {
                totalHeadings: this.headings.length,
                h1Count: this.headings.filter(h => h.level === 1).length,
                h2Count: this.headings.filter(h => h.level === 2).length,
                h3Count: this.headings.filter(h => h.level === 3).length,
                issueCount: this.issues.length,
                errorCount: this.issues.filter(i => i.severity === 'error').length,
                warningCount: this.issues.filter(i => i.severity === 'warning').length
            },
            headings: this.headings,
            issues: this.issues,
            tree: this.tree,
            seoAnalysis: this.seoAnalysis,
            visualization: visualizeHierarchy(this.headings)
        };
    }

    isValid() {
        return this.issues.filter(i => i.severity === 'error').length === 0;
    }
}

// Usage
const analyzer = new HeadingTagsAnalyzer(htmlContent);
const report = analyzer.getReport();

console.log('Summary:', report.summary);
console.log('Issues:', report.issues);
console.log('SEO Analysis:', report.seoAnalysis);
console.log('\nHierarchy:\n', report.visualization);
Enter fullscreen mode Exit fullscreen mode

Conclusion

Building a heading tags analyzer requires HTML parsing, hierarchy validation, and SEO-focused analysis. The analyzer we've built extracts all heading tags, validates structure, and provides actionable recommendations for optimization.

Try it yourself: https://techbelievers.com/tools/heading-tags-analyzer

Top comments (0)