[Office 2010 開發 ] 利用 OpenXML 來分析您的 Word 文件的格式有哪些

  • 6536
  • 0
  • 2010-05-20

[Office 2010 開發 ] 利用 OpenXML 來了解您的 Word 文件的格式有哪些

在本文中您將會知道/學到:

  1. 如何依樣式擷取其內容
  2. 如何透過 Open XML SDK 來擴充方法

 

而在本文中將會主要介紹下方幾項實作內容:

  1. 透過 Open XML SDK 開啟 Word 文件
  2. 找尋樣式的 ID 及其名稱。包括 段落、表格等
  3. 在找到段落、表格後可針對該項目進行了解是否有特定的樣式名稱
  4. 範例展示並呈現其結果

 

 

image

>> 這是範本檔案內容。 有 段落、表格、粗體等….

 

 

image

>> 標題部份為標題1

 

image

>> 顯示表格。

 

 

image

>> 這是針對上述 Word 文件的分析結果統計。

 

 

 

☆ 程式部份

◇ 請先建立一個 Concole Application (主控台)

◇ 再把 Program.cs 檔更改成如下的程式碼

   1:  using System;
   2:  using System.Collections.Generic;
   3:  using System.Linq;
   4:  using System.Text;
   5:  using DocumentFormat.OpenXml.Packaging;
   6:  using DocumentFormat.OpenXml.Wordprocessing;
   7:   
   8:  namespace QueryContentBasedOnStyles
   9:  {
  10:      class Program
  11:      {
  12:          static void Main(string[] args)
  13:          {
  14:              // 宣告各個變數名稱。
  15:              string paraStyle = "heading 1";
  16:              string runStyle = "Intense Emphasis";
  17:              string tableStyle = "Light List Accent 1";
  18:   
  19:              // 開啟 Word 檔案
  20:              using (WordprocessingDocument myDoc = 
  21:                  WordprocessingDocument.Open("input.docx", true))
  22:              {
  23:                  //把剛開啟的檑案指定成 MainDocumentPart
  24:                  MainDocumentPart mainPart = myDoc.MainDocumentPart;
  25:                  
  26:                  //依序針對下方所指定的樣式進行處理
  27:                  Console.WriteLine("Number of paragraphs with " 
  28:                      + paraStyle + " styles: " 
  29:                      + mainPart.ParagraphsByStyleName(paraStyle)
  30:                      .Count());
  31:                  Console.WriteLine("Number of runs with " 
  32:                      + runStyle + " styles: " 
  33:                      + mainPart.RunsByStyleName(runStyle).Count());
  34:                  Console.WriteLine("Number of tables with " 
  35:                      + tableStyle + " styles: " 
  36:                      + mainPart.TablesByStyleName(tableStyle)
  37:                      .Count());
  38:              }
  39:   
  40:              Console.ReadKey();
  41:          }
  42:   
  43:   
  44:      }
  45:  }

 

 

 

 

 

 

◇ 新增一個 WordStyleExtensions.cs 檔做為本次的 Open XML SDKExtension Methods (延伸方法),其程式碼如下:

   1:  using System;
   2:  using System.Collections.Generic;
   3:  using System.Linq;
   4:  using System.Text;
   5:  using DocumentFormat.OpenXml.Packaging;
   6:  using DocumentFormat.OpenXml.Wordprocessing;
   7:   
   8:  namespace QueryContentBasedOnStyles
   9:  {
  10:      public static class WordStyleExtensions
  11:      {
  12:          // 取得樣式名稱
  13:          private static string GetStyleIdFromStyleName(
  14:              MainDocumentPart mainPart, string styleName)
  15:          {
  16:              StyleDefinitionsPart stylePart = 
  17:                  mainPart.StyleDefinitionsPart;
  18:   
  19:              string styleId = stylePart.Styles
  20:                                  .Descendants<StyleName>()
  21:                                  .Where(s => s.Val.Value.Equals(styleName))
  22:                                  .Select(n => ((Style)n.Parent).StyleId)
  23:                                  .FirstOrDefault();
  24:   
  25:                 return styleId ?? styleName;
  26:          }
  27:   
  28:          // 段落
  29:          public static IEnumerable<Paragraph> ParagraphsByStyleName(
  30:              this MainDocumentPart mainPart, string styleName)
  31:          {
  32:              string styleId = GetStyleIdFromStyleName(mainPart, styleName);
  33:   
  34:              IEnumerable<Paragraph> paraList = mainPart.Document
  35:                          .Descendants<Paragraph>()
  36:                          .Where(p => IsParagraphInStyle(p, styleId));
  37:   
  38:              return paraList;
  39:          }
  40:   
  41:          //判斷是否為段落
  42:          private static bool IsParagraphInStyle(Paragraph p, 
  43:              string styleId)
  44:          {
  45:              ParagraphProperties pPr = p
  46:                  .GetFirstChild<ParagraphProperties>();
  47:   
  48:              if (pPr != null)
  49:              {
  50:                  ParagraphStyleId paraStyle = pPr.ParagraphStyleId;
  51:                  if (paraStyle != null)
  52:                  {
  53:                      return paraStyle.Val.Value.Equals(styleId);
  54:                  }
  55:              }
  56:              return false;
  57:          }
  58:   
  59:          public static IEnumerable<Run> RunsByStyleName(
  60:              this MainDocumentPart mainPart, string styleName)
  61:          {
  62:              string styleId = GetStyleIdFromStyleName(mainPart, styleName);
  63:   
  64:              IEnumerable<Run> runList = mainPart.Document
  65:                      .Descendants<Run>()
  66:                      .Where(r => IsRunInStyle(r, styleId));
  67:   
  68:              return runList;
  69:          }
  70:   
  71:          private static bool IsRunInStyle(Run r, string styleId)
  72:          {
  73:              RunProperties rPr = r.GetFirstChild<RunProperties>();
  74:   
  75:              if (rPr != null)
  76:              {
  77:                  RunStyle runStyle = rPr.RunStyle;
  78:                  if (runStyle != null)
  79:                  {
  80:                      return runStyle.Val.Value.Equals(styleId);
  81:                  }
  82:              }
  83:              return false;
  84:          }
  85:   
  86:          //表格
  87:          public static IEnumerable<Table> TablesByStyleName(
  88:              this MainDocumentPart mainPart, string styleName)
  89:          {
  90:              string styleId = GetStyleIdFromStyleName(mainPart, styleName);
  91:              IEnumerable<Table> tableList = mainPart.Document
  92:                      .Descendants<Table>()
  93:                      .Where(t => IsTableInStyle(t, styleId));
  94:   
  95:              return tableList;
  96:          }
  97:   
  98:          //判斷是否為表格
  99:          private static bool IsTableInStyle(Table tbl, string styleId)
 100:          {
 101:              TableProperties tblPr = tbl.GetFirstChild<TableProperties>();
 102:   
 103:              if (tblPr != null)
 104:              {
 105:                  TableStyle tblStyle = tblPr.TableStyle;
 106:                  if (tblStyle != null)
 107:                  {
 108:                      return tblStyle.Val.Value.Equals(styleId);
 109:                  }
 110:              }
 111:              return false;
 112:          }
 113:      }
 114:  }

 

 

 

 

  

>> 檔案下載:點我下載

--->  本文預設於 2010.05.20 登入「Office/Sharepoint 開發組

 

 

 

 

 

 

 

 

 

 

>> 參考翻譯及引用:Retrieving Word Content Based on Styles