For a small project, I needed to convert German laws, found at https://www.gesetze-im-internet.de/, from XML format to text format.
The XML format is described here and is defined by this DTD file.
The source code in the following XSL file is pretty straight-forward. Only adding newlines and indenting definition lists posed an additional challenge.
<?xml version="1.0" encoding="UTF-8"?> | |
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> | |
<xsl:output method="text" encoding="utf-8" omit-xml-declaration="yes"/> | |
<xsl:strip-space elements="*"/> | |
<xsl:variable name="newline"><xsl:text> | |
</xsl:text></xsl:variable> | |
<xsl:variable name="space"><xsl:text> </xsl:text></xsl:variable> | |
<xsl:variable name="tab" select="concat($space, $space, $space, $space)"/> | |
<xsl:template match="/dokumente"> | |
<xsl:apply-templates select="norm/metadaten/langue"/> | |
<xsl:apply-templates select="norm[metadaten/enbez/text() != 'Inhaltsübersicht' or metadaten/langue and textdaten]"/> | |
<xsl:value-of select="concat($newline, $newline, $newline, $newline, $newline, | |
$newline, $newline, $newline, $newline, $newline, | |
$newline, $newline, $newline, $newline, $newline, | |
$newline, $newline, $newline, $newline, $newline, | |
$newline, $newline, $newline, $newline, $newline)"/> | |
</xsl:template> | |
<xsl:template match="norm/metadaten/langue"> | |
<xsl:value-of select="concat(normalize-space(.), $newline, $newline, $newline)"/> | |
</xsl:template> | |
<xsl:template match="norm[metadaten/enbez]"> | |
<xsl:choose> | |
<xsl:when test="metadaten/titel"> | |
<xsl:value-of select="concat(metadaten/enbez, $space, $space, normalize-space(metadaten/titel), | |
$newline, $newline)"/> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:value-of select="concat(metadaten/enbez, | |
$newline, $newline)"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
<xsl:apply-templates select="textdaten"/> | |
</xsl:template> | |
<xsl:template match="norm[metadaten/langue and textdaten]"> | |
<xsl:apply-templates select="textdaten"/> | |
</xsl:template> | |
<xsl:template match="textdaten"> | |
<xsl:apply-templates select="text/Content"/> | |
<xsl:if test="fussnoten"> | |
<xsl:value-of select="$newline"/> | |
<xsl:apply-templates select="fussnoten/Content"/> | |
</xsl:if> | |
<xsl:value-of select="concat($newline, $newline)"/> | |
</xsl:template> | |
<xsl:template match="P"> | |
<xsl:apply-templates/> | |
<xsl:value-of select="$newline"/> | |
</xsl:template> | |
<xsl:template match="DL"> | |
<xsl:value-of select="$newline"/> | |
<xsl:apply-templates/> | |
<xsl:if test="name(../../..) != 'DL' and name(..) != 'P' and position() != last()"> | |
<xsl:value-of select="$newline"/> | |
</xsl:if> | |
</xsl:template> | |
<xsl:template match="DT"> | |
<xsl:if test="name(../../../..) = 'DL'"> | |
<xsl:value-of select="$tab"/> | |
</xsl:if> | |
<xsl:value-of select="concat($tab, ., $space)"/> | |
</xsl:template> | |
<xsl:template match="DD"> | |
<xsl:apply-templates/> | |
<xsl:if test="position() != last()"> | |
<xsl:value-of select="$newline"/> | |
</xsl:if> | |
</xsl:template> | |
<xsl:template match="BR"> | |
<xsl:choose> | |
<xsl:when test="name(..) = 'entry'"> | |
<xsl:value-of select="$space"/> | |
</xsl:when> | |
<xsl:otherwise> | |
<xsl:value-of select="$newline"/> | |
</xsl:otherwise> | |
</xsl:choose> | |
</xsl:template> | |
<xsl:template match="TOC"> | |
<!-- Do nothing --> | |
</xsl:template> | |
<xsl:template match="Title"> | |
<xsl:apply-templates/> | |
<xsl:value-of select="$newline"/> | |
</xsl:template> | |
<xsl:template match="table"> | |
<xsl:apply-templates/> | |
</xsl:template> | |
<xsl:template match="row"> | |
<xsl:apply-templates select="entry"/> | |
<xsl:value-of select="$newline"/> | |
</xsl:template> | |
<xsl:template match="entry"> | |
<xsl:apply-templates/> | |
<xsl:if test="position() != last()"> | |
<xsl:value-of select="$tab"/> | |
</xsl:if> | |
</xsl:template> | |
</xsl:stylesheet> |
Top comments (0)