DEV Community

Masui Masanori
Masui Masanori

Posted on


[ASP.NET Core] Try reading a word processing file by OpenXML 2


I will try getting text styles and fonts in this time.

Getting specified styles and fonts

First, I will try getting the text styles and font what I specify them by myself.

Image description


using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;

namespace OfficeFileAccessor.OfficeFiles.Readers;

public class DocFileReader : IOfficeFileReader
    private readonly NLog.Logger logger;
    public DocFileReader()
        this.logger = NLog.LogManager.GetCurrentClassLogger();
    public void Read(IFormFile file)
        using WordprocessingDocument wordDoc = WordprocessingDocument.Open(file.OpenReadStream(), false);
        Body? body = wordDoc.MainDocumentPart?.Document?.Body;
        if (body == null)
            logger.Warn("Failed reading the document");
        foreach (OpenXmlElement elm in body.Elements())
            if (elm is Table table)
            else if (elm is Paragraph paragraph)
                if (elm.InnerText.Trim().Length <= 0)
                // Get full text from paragraph.InnerText
                logger.Info($"Paragraph Text: {paragraph.InnerText}");
                PrintFontInfoFromParagraph(wordDoc.MainDocumentPart, paragraph);
    private void PrintFontInfoFromParagraph(MainDocumentPart? mainPart, Paragraph paragraph)
        // One paragraph is separated as multiple Run elements by styles and fonts
        foreach (Run run in paragraph.Elements<Run>())
            logger.Info($"Run Text: {run.InnerText}");
            // Get text style and font from RunProperties.
            RunProperties? runProperties = run.RunProperties;
            if (runProperties != null)
                logger.Info($"RunProperties found:");
                var fonts = runProperties.RunFonts;
                if (fonts != null)
                    logger.Info($"Font Name: {GetFontName(fonts, mainPart)}");
                if (runProperties.Color != null)
                    logger.Info($"Color: {runProperties.Color.Val}");
                if (runProperties.Bold != null)
                    logger.Info($"Bold: {runProperties.Bold.Val}");
                if (runProperties.FontSize == null)
                    logger.Info($"FontSize was null");
                else if(int.TryParse(runProperties.FontSize.Val, out var size))
                    // runProperties.FontSize.Val represents half-points
                    logger.Info($"FontSize: {size / 2}");
    private string GetFontName(RunFonts? runFonts, MainDocumentPart? mainPart)
        string? result = runFonts?.Ascii ??
            runFonts?.HighAnsi ??
            runFonts?.EastAsia ??
        if (string.IsNullOrEmpty(result))
            result = "No font set";
        return result;
Enter fullscreen mode Exit fullscreen mode


Paragraph Text: カスタムfontに設定した場合
Run Text: カス
RunProperties found:
Font Name: No font set
FontSize was null
Run Text: タムfo
RunProperties found:
Font Name: Noto Sans JP Black
FontSize was null
Run Text: ntに
RunProperties found:
Font Name: No font set
FontSize was null
Run Text: 設定
RunProperties found:
Font Name: No font set
FontSize was null
Run Text: した
RunProperties found:
Font Name: Meiryo UI
FontSize: 16
Run Text: 場合
RunProperties found:
Font Name: No font set
Color: 60CAF3
FontSize: 22
Enter fullscreen mode Exit fullscreen mode

Getting default styles and fonts

Unless I change the font, color, size, etc. by myself, the above code won't get that informations.

Image description

So I have to get them from the base style or ThemeFonts.
I can get informations of "見出し(Headline)".
But some styles like "標準(Normal)" don't have style informations, so I set the default values if I can't get "ParagraphStyleId" from the paragraph.


public class DocFileReader : IOfficeFileReader
    private readonly NLog.Logger logger;
    private enum FontType
        Ascii = 0,
    private enum FontPriority {
        Major = 0,
    private record ThemeFont(string? EastAsiaMajorFont, string? EastAsiaMinorFont, string? LatinMajorFont, string? LatinMinorFont);
    private record TextFont (FontType FontType, string FontName);
    private class TextProps
        public List<TextFont> Fonts { get; set; } = [];
        public int FontSize { get; set; } = 11;
        public bool Bold { get; set; } = false;
        public string Color { get; set; } = "000000";
    public void Read(IFormFile file)
        using WordprocessingDocument wordDoc = WordprocessingDocument.Open(file.OpenReadStream(), false);

        Body? body = wordDoc.MainDocumentPart?.Document?.Body;
        ThemeFont themeFont = GetThemeFont(wordDoc.MainDocumentPart);
        foreach (OpenXmlElement elm in body.Elements())
            if (elm is Table table)
            else if (elm is Paragraph paragraph)
                // Get full text from paragraph.InnerText
                logger.Info($"Paragraph Text: {paragraph.InnerText}");
                PrintFontInfoFromParagraph(wordDoc.MainDocumentPart, paragraph, themeFont);
    /// <summary>
    /// Get fonts from Theme
    /// </summary>
    /// <param name="mainPart"></param>
    /// <returns></returns>
    private ThemeFont GetThemeFont(MainDocumentPart? mainPart)
        if (mainPart?.ThemePart == null)
            return new(null, null, null, null);
        var theme = mainPart.ThemePart.Theme;
        var themeElements = theme.ThemeElements;
        if (themeElements == null)
            return new(null, null, null, null);
        var majorFontScheme = themeElements.FontScheme?.MajorFont;
        var minorFontScheme = themeElements.FontScheme?.MinorFont;
        if(majorFontScheme == null && minorFontScheme == null)
            return new(null, null, null, null);
        return new ThemeFont(EastAsiaMajorFont: majorFontScheme?.EastAsianFont?.Typeface,
            EastAsiaMinorFont: minorFontScheme?.EastAsianFont?.Typeface,
            LatinMajorFont: majorFontScheme?.LatinFont?.Typeface,
            LatinMinorFont: minorFontScheme?.LatinFont?.Typeface);
    private void PrintFontInfoFromParagraph(MainDocumentPart? mainPart, Paragraph paragraph, ThemeFont themeFont)
        TextProps? props = GetTextProps(mainPart, paragraph, themeFont); 

        // One paragraph is separated as multiple Run elements by styles and font types
        foreach (Run run in paragraph.Elements<Run>())
            logger.Info($"Run Text: {run.InnerText}");
            RunProperties? runProperties = run.RunProperties;
            if (runProperties == null)
                logger.Info("runProperties was null");
                var fonts = GetFonts(runProperties.RunFonts);                
                if(fonts.Count > 0)
                    foreach(var f in fonts)
                        logger.Info($"Font Name: {f.FontName} Type: {f.FontType}");
                else if(props?.Fonts != null)
                    foreach(var f in props.Fonts)
                        logger.Info($"Font Name: {f.FontName} Type: {f.FontType}");

                if (runProperties.Color == null)
                    if(props?.Color != null)
                        logger.Info($"Color: {props.Color}");
                    logger.Info($"Color: {runProperties.Color.Val}");
                if (runProperties.Bold == null)
                    if(props?.Bold != null)
                        logger.Info($"Bold: {props.Bold}");
                    logger.Info($"Bold: {runProperties.Bold.Val}");
                if (runProperties.FontSize == null)
                    if(props?.FontSize != null)
                        logger.Info($"FontSize: {props.FontSize}");
                else if(int.TryParse(runProperties.FontSize.Val, out var size))
                    // runProperties.FontSize.Val represents half-points
                    logger.Info($"FontSize: {size / 2}");
    /// <summary>
    /// Get style and font from paragraph
    /// </summary>
    /// <param name="mainPart"></param>
    /// <param name="paragraph"></param>
    /// <param name="themeFont"></param>
    /// <returns></returns>
    private TextProps? GetTextProps(MainDocumentPart? mainPart, Paragraph paragraph, ThemeFont themeFont)
        string? styleId = paragraph.ParagraphProperties?.ParagraphStyleId?.Val?.Value;
        Style? style = GetStyleById(mainPart, styleId);

        TextProps? result = GetTextPropsFromRunProperties(style?.StyleRunProperties, themeFont);
        if(style == null)
            return GenerateDefaultProps(themeFont);
        else if(result == null || result.Fonts == null || result.Fonts.Count <= 0)
            StyleRunProperties? inheritedRunProperties = GetInheritedRunProperties(style, mainPart);
            if (inheritedRunProperties == null)
                return GenerateDefaultProps(themeFont);
                logger.Info("Inherited from Base Style:");
                return GetTextPropsFromRunProperties(inheritedRunProperties, themeFont);
        return result;
    private static StyleRunProperties? GetInheritedRunProperties(Style style, MainDocumentPart? mainPart)
        if (style.BasedOn != null)
            string? baseStyleId = style.BasedOn.Val?.Value;
            Style? baseStyle = mainPart?.StyleDefinitionsPart?.Styles?.Elements<Style>()
                .FirstOrDefault(s => s.StyleId == baseStyleId);
            if (baseStyle != null)
                if (baseStyle.StyleRunProperties != null)
                    return baseStyle.StyleRunProperties;
                    return GetInheritedRunProperties(baseStyle, mainPart);
        return null;
    private static List<TextFont> GetFonts(RunFonts? runFonts)
        List<TextFont> results = [];
        if(string.IsNullOrEmpty(runFonts?.Ascii?.Value) == false)
            results.Add(new TextFont(FontType.Ascii, runFonts.Ascii.Value));
        if(string.IsNullOrEmpty(runFonts?.HighAnsi?.Value) == false)
            results.Add(new TextFont(FontType.HighAnsi, runFonts.HighAnsi.Value));
        if(string.IsNullOrEmpty(runFonts?.EastAsia?.Value) == false)
            results.Add(new TextFont(FontType.EastAsia, runFonts.EastAsia.Value));
        return results;
    private static TextProps GenerateDefaultProps(ThemeFont themeFont)
        // If the style cannot be gotton, return the default font information.
        List<TextFont> fonts = [];
        if(string.IsNullOrEmpty(themeFont.LatinMinorFont) == false)
            fonts.Add(new(FontType.Latin, themeFont.LatinMinorFont));
        if(string.IsNullOrEmpty(themeFont.EastAsiaMinorFont) == false)
            fonts.Add(new(FontType.EastAsia, themeFont.EastAsiaMinorFont));
        return new ()
            Fonts = fonts,
    private static Style? GetStyleById(MainDocumentPart? mainPart, string? styleId)
            return null;
        IEnumerable<Style>? styles = mainPart?.StyleDefinitionsPart?.Styles?.Elements<Style>();
        if (styles != null)
            return styles.FirstOrDefault(s => s.StyleId == styleId);
        return null;
    private TextProps? GetTextPropsFromRunProperties(StyleRunProperties? runProperties, ThemeFont themeFont)
        if (runProperties == null)
            return null;
        TextProps? result = new();
        var runFonts = runProperties.RunFonts;
        if (runFonts != null)
            result.Fonts = GetTextFonts(runFonts);
            if(result.Fonts.Count <= 0)
                result.Fonts = GetTextFonts(themeFont, runFonts);
        if (runProperties.Color?.Val != null)
            result.Color = runProperties.Color.Val!;
        if (runProperties.Bold != null)
            result.Bold = true;
        // runProperties.FontSize.Val represents half-points
        if (string.IsNullOrEmpty(runProperties.FontSize?.Val) == false &&
            int.TryParse(runProperties.FontSize?.Val, out var size))
            result.FontSize = size / 2;
        return result;
    /// <summary>
    /// Get font name from RunFonts
    /// </summary>
    /// <param name="runFonts"></param>
    /// <returns></returns>
    private static List<TextFont> GetTextFonts(RunFonts runFonts)
        List<TextFont> results = [];
        if (runFonts.Ascii?.Value != null && runFonts.Ascii.HasValue)
            results.Add(new TextFont(FontType.Ascii, runFonts.Ascii.Value));
        if (runFonts.HighAnsi?.Value != null && runFonts.HighAnsi.HasValue)
            results.Add(new TextFont(FontType.HighAnsi, runFonts.HighAnsi.Value));
        if (runFonts.EastAsia?.Value != null && runFonts.EastAsia.HasValue)
            results.Add(new TextFont(FontType.EastAsia, runFonts.EastAsia.Value));
        return results;
    /// <summary>
    /// Get font name from ThemeFonts
    /// </summary>
    /// <param name="themeFont"></param>
    /// <param name="runFonts"></param>
    /// <returns></returns>
    private static List<TextFont> GetTextFonts(ThemeFont themeFont, RunFonts runFonts)
        List<TextFont> results = [];
        // ThemeFont is divided into MajorFont and MinorFont.
        if(runFonts.EastAsiaTheme?.Value == ThemeFontValues.MajorEastAsia)
            if(string.IsNullOrEmpty(themeFont.LatinMajorFont) == false)
                results.Add(new(FontType.Latin, themeFont.LatinMajorFont));
            if(string.IsNullOrEmpty(themeFont.EastAsiaMajorFont) == false)
                results.Add(new(FontType.EastAsia, themeFont.EastAsiaMajorFont));
            if(string.IsNullOrEmpty(themeFont.LatinMinorFont) == false)
                results.Add(new(FontType.Latin, themeFont.LatinMinorFont));
            if(string.IsNullOrEmpty(themeFont.EastAsiaMinorFont) == false)
                results.Add(new(FontType.EastAsia, themeFont.EastAsiaMinorFont));
        return results;
Enter fullscreen mode Exit fullscreen mode


Found a Paragraph with text: This is みだし1
Paragraph Text: This is みだし1
Run Text: This is みだし1
Font Name: 游ゴシック Light Type: Latin
Color: 000000
Bold: False
FontSize: 16
Found a Paragraph with text: あいう
Paragraph Text: あいう
Run Text: あいう
Font Name: 游明朝 Type: Latin
Color: 000000
Bold: False
FontSize: 11
Found a Paragraph with text: 見出し2
Paragraph Text: 見出し2
Run Text: 見出し2
Font Name: 游ゴシック Light Type: Latin
Color: 000000
Bold: False
FontSize: 14
Found a Paragraph with text: えおか
Paragraph Text: えおか
Run Text: えおか
Font Name: 游明朝 Type: Latin
Color: 000000
Bold: False
FontSize: 11
Found a Paragraph with text: きくけ
Paragraph Text: きくけ
Run Text: きくけ
Font Name: 游明朝 Type: Latin
Color: 000000
Bold: False
FontSize: 11
Found a Paragraph with text: こさし
Paragraph Text: こさし
Run Text: こさし
Font Name: 游明朝 Type: Latin
Color: 000000
Bold: False
FontSize: 11
Enter fullscreen mode Exit fullscreen mode

Image of Docusign

Bring your solution into Docusign. Reach over 1.6M customers.

Docusign is now extensible. Overcome challenges with disconnected products and inaccessible data by bringing your solutions into Docusign and publishing to 1.6M customers in the App Center.

Learn more

Top comments (0)

A Workflow Copilot. Tailored to You. image

Our desktop app, with its intelligent copilot, streamlines coding by generating snippets, extracting code from screenshots, and accelerating problem-solving.

Read the docs