DEV Community

Wincent Balin
Wincent Balin

Posted on • Originally published at ofdigitalwater.postach.io on

Convert German laws from XML to text using XSLT

For a small project, I needed to convert German laws, found at https://www.gesetze-im-internet.de/, from XML format to text format.

The XML format is described here and is defined by this DTD file.

The source code in the following XSL file is pretty straight-forward. Only adding newlines and indenting definition lists posed an additional challenge.

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="utf-8" omit-xml-declaration="yes"/>
<xsl:strip-space elements="*"/>
<xsl:variable name="newline"><xsl:text>
</xsl:text></xsl:variable>
<xsl:variable name="space"><xsl:text> </xsl:text></xsl:variable>
<xsl:variable name="tab" select="concat($space, $space, $space, $space)"/>
<xsl:template match="/dokumente">
<xsl:apply-templates select="norm/metadaten/langue"/>
<xsl:apply-templates select="norm[metadaten/enbez/text() != 'Inhaltsübersicht' or metadaten/langue and textdaten]"/>
<xsl:value-of select="concat($newline, $newline, $newline, $newline, $newline,
$newline, $newline, $newline, $newline, $newline,
$newline, $newline, $newline, $newline, $newline,
$newline, $newline, $newline, $newline, $newline,
$newline, $newline, $newline, $newline, $newline)"/>
</xsl:template>
<xsl:template match="norm/metadaten/langue">
<xsl:value-of select="concat(normalize-space(.), $newline, $newline, $newline)"/>
</xsl:template>
<xsl:template match="norm[metadaten/enbez]">
<xsl:choose>
<xsl:when test="metadaten/titel">
<xsl:value-of select="concat(metadaten/enbez, $space, $space, normalize-space(metadaten/titel),
$newline, $newline)"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat(metadaten/enbez,
$newline, $newline)"/>
</xsl:otherwise>
</xsl:choose>
<xsl:apply-templates select="textdaten"/>
</xsl:template>
<xsl:template match="norm[metadaten/langue and textdaten]">
<xsl:apply-templates select="textdaten"/>
</xsl:template>
<xsl:template match="textdaten">
<xsl:apply-templates select="text/Content"/>
<xsl:if test="fussnoten">
<xsl:value-of select="$newline"/>
<xsl:apply-templates select="fussnoten/Content"/>
</xsl:if>
<xsl:value-of select="concat($newline, $newline)"/>
</xsl:template>
<xsl:template match="P">
<xsl:apply-templates/>
<xsl:value-of select="$newline"/>
</xsl:template>
<xsl:template match="DL">
<xsl:value-of select="$newline"/>
<xsl:apply-templates/>
<xsl:if test="name(../../..) != 'DL' and name(..) != 'P' and position() != last()">
<xsl:value-of select="$newline"/>
</xsl:if>
</xsl:template>
<xsl:template match="DT">
<xsl:if test="name(../../../..) = 'DL'">
<xsl:value-of select="$tab"/>
</xsl:if>
<xsl:value-of select="concat($tab, ., $space)"/>
</xsl:template>
<xsl:template match="DD">
<xsl:apply-templates/>
<xsl:if test="position() != last()">
<xsl:value-of select="$newline"/>
</xsl:if>
</xsl:template>
<xsl:template match="BR">
<xsl:choose>
<xsl:when test="name(..) = 'entry'">
<xsl:value-of select="$space"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$newline"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="TOC">
<!-- Do nothing -->
</xsl:template>
<xsl:template match="Title">
<xsl:apply-templates/>
<xsl:value-of select="$newline"/>
</xsl:template>
<xsl:template match="table">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="row">
<xsl:apply-templates select="entry"/>
<xsl:value-of select="$newline"/>
</xsl:template>
<xsl:template match="entry">
<xsl:apply-templates/>
<xsl:if test="position() != last()">
<xsl:value-of select="$tab"/>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
view raw giitotext.xsl hosted with ❤ by GitHub

Top comments (0)

Retry later
Retry later