[.Net] 將Markdown轉Excel

Markdown轉Excel其實Adobe就有線上工具能作得很好, 自己寫程式可以這樣作:

將PDF(含圖表)轉Excel的步驟:

  1. [AI] 使用Azure Document Intelligence將文件轉Markdown
  2. [AI] 搭配AI Vision進一步補強OCR結果
  3. 將Markdown轉Excel

以下WriteExcel是將指定目錄下的.md檔案讀入, 寫成Excel的每Sheet內的作法; 要先在nuget安裝ClosedXML

using ClosedXML.Excel;
using System.Globalization;
using System.Text;

namespace Abbee
{
    internal class Md2Excel
    {
        public static void WriteExcel(string FromDir, string ToFile)
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
            using var workbook = new XLWorkbook();
            workbook.Style.Font.FontName = "Microsoft JhengHei";
            foreach (var mdFile in Directory.GetFiles(FromDir, "*.md"))
            {
                var lines = File.ReadAllLines(mdFile);
                var sheetName = Path.GetFileNameWithoutExtension(mdFile).TrimStart('0');
                var worksheet = workbook.Worksheets.Add(sheetName);
                MdToCell(lines, worksheet);
                worksheet.Columns().Width = size;
            }
            var outputDir = Path.GetDirectoryName(ToFile);
            if (!string.IsNullOrWhiteSpace(outputDir))
            {
                Directory.CreateDirectory(outputDir);
            }
            workbook.SaveAs(ToFile);
            Console.WriteLine($"Excel已完成: {ToFile}");
        }

        private static void MdToCell(string[] lines, IXLWorksheet worksheet)
        {
            var row = 1;
            var i = 0;
            while (i < lines.Length)
            {
                var line = lines[i];
                if (IsTableHeader(lines, i))
                {
                    var separatorLine = lines[++i];
                    var tableLines = new List<string> { line };
                    while (++i < lines.Length && IsTableRow(lines[i]))
                        tableLines.Add(lines[i]);

                    row = WriteTable(tableLines, separatorLine, worksheet, row);
                    continue;
                }

                if (string.IsNullOrWhiteSpace(line)
                    || WriteHeading(line, worksheet, row))
                {
                    row++;
                    i++;
                    continue;
                }

                var cell = worksheet.Cell(row, 1);
                CellStyles(cell, line);
                colSpan(cell);
                row++;
                i++;
            }
        }

        #region set style
        private const int size = 16;//設定長和寬都是16,其實寬16很長
        private static void CellStyles(IXLCell cell, string text)
        {
            if (text.Contains("~~"))
            {
                text = text.Replace("~~", string.Empty);
                //cell.Style.Font.Strikethrough = true;
                cell.Style.Fill.BackgroundColor = XLColor.LightPink;
            }
            cell.Style.Font.FontSize = 11;
            cell.Value = text;
            cell.Style.Alignment.WrapText = true;
        }

        private static void colSpan(IXLCell cell)
        {
            var ws = cell.Worksheet;
            var row = cell.Address.RowNumber;
            var col = cell.Address.ColumnNumber;
            int colSpan = 6;
            ws.Range(row, col, row, colSpan).Merge();
            SetHeight(ws, row, cell.GetString(), colSpan);
        }

        private static void SetHeight(IXLWorksheet ws, int row, string text, int colSpan = 1)
        {
            var len = Encoding.GetEncoding("big5").GetByteCount(text);
            //每格14個半形字
            var lineCount = Math.Ceiling(len / 14d / colSpan);
            ws.Row(row).Height = Math.Max(ws.Row(row).Height, lineCount * size);
        }
        #endregion

        private static bool WriteHeading(string line, IXLWorksheet worksheet, int row)
        {
            var level = line.TakeWhile(ch => ch == '#').Count();
            if (level == 0 || level > 6)
                return false;

            var text = line[level..].Trim();
            if (string.IsNullOrEmpty(text))
                return false;

            var cell = worksheet.Cell(row, 1);
            CellStyles(cell, text);
            colSpan(cell);
            cell.Style.Font.Bold = true;
            cell.Style.Font.FontSize = 16 - level;
            worksheet.Row(row).Height = size + (6 - level) * 2;
            return true;
        }

        #region table
        private static bool IsTableRow(string line)
        {
            return line.Contains('|');
        }

        private static bool IsTableHeader(string[] lines, int idx)
        {
            return IsTableRow(lines[idx])
                && idx + 1 < lines.Length
                && lines[idx + 1].Contains("---");
        }

        private static int WriteTable(List<string> tableLines, string separatorLine, IXLWorksheet worksheet, int startRow)
        {
            var row = startRow;
            var columnAligns = (from c in SplitTableColumns(separatorLine)
                                select c.EndsWith(':')).ToList();
            for (var index = 0; index < tableLines.Count; index++)
            {
                var columns = SplitTableColumns(tableLines[index]);
                for (var col = 0; col < columns.Count; col++)
                {
                    var cell = worksheet.Cell(row, col + 1);
                    var rightAligned = columnAligns?[col] ?? false;
                    TableCellStyles(cell, columns[col], rightAligned);
                    cell.Style.Font.Bold = (index == 0);
                }
                row++;
            }

            return row;
        }

        private static List<string> SplitTableColumns(string line)
        {
            var trimmed = line.Trim().Trim('|');
            return trimmed.Split('|').Select(item => item.Trim()).ToList();
        }

        private static void TableCellStyles(IXLCell cell, string text, bool rightAligned)
        {
            if (rightAligned)
            {
                cell.Style.Alignment.Horizontal = XLAlignmentHorizontalValues.Right;
                var normalized = text.Replace("$", string.Empty)
                                    .Replace(",", string.Empty)
                                    .Replace("%", string.Empty)
                                    .Trim();
                var hasDecimal = text.Contains('.');
                var hasDollar = text.Contains('$');
                if (decimal.TryParse(normalized, NumberStyles.Number, CultureInfo.InvariantCulture, out var number))
                {
                    var red = ";[Red]-";
                    var dollar = hasDollar ? "$" : "";
                    var dec = hasDecimal ? ".00" : "";
                    var format = $"{dollar}#,##0{dec}";
                    if (text.Contains("%"))
                    {
                        number *= 0.01m;
                        format += "%";
                    }
                    cell.Value = number;
                    cell.Style.NumberFormat.Format = $"{format}{red}{format}";
                    return;
                }
            }
            CellStyles(cell, text);
            SetHeight(cell.Worksheet, cell.Address.RowNumber, text);
        }
        #endregion
    }
}

Taiwan is a country. 臺灣是我的國家