Hi @Oscar Vagle ,
The styleId s like "Header1", "Header2", etc. are set by Word, so as long as you aren't doing anything that could change them, you can use those to determine the header level. This won't tell you what their Heading number should be (e.g. sections 1.1.1.1 and 2.1.1.1 would both be <h4>. You could keep track of the level in the header tree and add your own numbering if you want. The code below will read in a document and create the html for the headers and turn everything else into a paragraph. If you want to handle lists and other styles, you could extend it to handle those situations as well. Please let me know if this answers your question.
WordprocessingDocument doc = WordprocessingDocument.Open(filePath, true);
HtmlContentBuilder htmlContentBuilder = new HtmlContentBuilder();
doc.MainDocumentPart?.Document?.Body?.ChildElements.ToList().ForEach(t =>
{
if (t is Paragraph)
{
Paragraph? p = t as Paragraph;
if (!(p is null))
{
ParagraphProperties? x = p.ChildElements.First<ParagraphProperties>();
if (!(x is null) && x.ParagraphStyleId != null)
{
string? style = x.ParagraphStyleId.Val;
if (style == "Heading1")
{
htmlContentBuilder.AppendLine(string.Concat("<h1>", p.InnerText, "</h1>"));
}
else if (style == "Heading2")
{
htmlContentBuilder.AppendLine(string.Concat("<h2>", p.InnerText, "</h2>"));
}
else if (style == "Heading3")
{
htmlContentBuilder.AppendLine(string.Concat("<h3>", p.InnerText, "</h3>"));
}
else if (style == "Heading4")
{
htmlContentBuilder.AppendLine(string.Concat("<h4>", p.InnerText, "</h4>"));
}
else if (style == "Heading5")
{
htmlContentBuilder.AppendLine(string.Concat("<h5>", p.InnerText, "</h5>"));
}
else if (style == "Heading6")
{
htmlContentBuilder.AppendLine(string.Concat("<h6>", p.InnerText, "</h6>"));
}
else
{
htmlContentBuilder.AppendLine(string.Concat("<p>", p.InnerText, "</p>"));
}
}
else
{
htmlContentBuilder.AppendLine(string.Concat("<p>", p.InnerText, "</p>"));
}
}
}
});
using (StringWriter writer = new StringWriter())
{
htmlContentBuilder.WriteTo(writer, HtmlEncoder.Default);
// The html string for the document
string htmlString = writer.ToString();
Console.WriteLine(htmlString);
}
Mike Bowen
Microsoft Open Specifications Support