[Office 2010 開發 ] 利用 OpenXML 來了解您的 Word 文件的格式有哪些
在本文中您將會知道/學到:
- 如何依樣式擷取其內容
- 如何透過 Open XML SDK 來擴充方法
而在本文中將會主要介紹下方幾項實作內容:
- 透過 Open XML SDK 開啟 Word 文件
- 找尋樣式的 ID 及其名稱。包括 段落、表格等
- 在找到段落、表格後可針對該項目進行了解是否有特定的樣式名稱
- 範例展示並呈現其結果
>> 這是範本檔案內容。 有 段落、表格、粗體等….
>> 標題部份為標題1
>> 顯示表格。
>> 這是針對上述 Word 文件的分析結果統計。
☆ 程式部份
◇ 請先建立一個 Concole Application (主控台)
◇ 再把 Program.cs 檔更改成如下的程式碼
1: using System;
2: using System.Collections.Generic;
3: using System.Linq;
4: using System.Text;
5: using DocumentFormat.OpenXml.Packaging;
6: using DocumentFormat.OpenXml.Wordprocessing;
7:
8: namespace QueryContentBasedOnStyles
9: {
10: class Program
11: {
12: static void Main(string[] args)
13: {
14: // 宣告各個變數名稱。
15: string paraStyle = "heading 1";
16: string runStyle = "Intense Emphasis";
17: string tableStyle = "Light List Accent 1";
18:
19: // 開啟 Word 檔案
20: using (WordprocessingDocument myDoc =
21: WordprocessingDocument.Open("input.docx", true))
22: {
23: //把剛開啟的檑案指定成 MainDocumentPart
24: MainDocumentPart mainPart = myDoc.MainDocumentPart;
25:
26: //依序針對下方所指定的樣式進行處理
27: Console.WriteLine("Number of paragraphs with "
28: + paraStyle + " styles: "
29: + mainPart.ParagraphsByStyleName(paraStyle)
30: .Count());
31: Console.WriteLine("Number of runs with "
32: + runStyle + " styles: "
33: + mainPart.RunsByStyleName(runStyle).Count());
34: Console.WriteLine("Number of tables with "
35: + tableStyle + " styles: "
36: + mainPart.TablesByStyleName(tableStyle)
37: .Count());
38: }
39:
40: Console.ReadKey();
41: }
42:
43:
44: }
45: }
◇ 新增一個 WordStyleExtensions.cs 檔做為本次的 Open XML SDK 的 Extension Methods (延伸方法),其程式碼如下:
1: using System;
2: using System.Collections.Generic;
3: using System.Linq;
4: using System.Text;
5: using DocumentFormat.OpenXml.Packaging;
6: using DocumentFormat.OpenXml.Wordprocessing;
7:
8: namespace QueryContentBasedOnStyles
9: {
10: public static class WordStyleExtensions
11: {
12: // 取得樣式名稱
13: private static string GetStyleIdFromStyleName(
14: MainDocumentPart mainPart, string styleName)
15: {
16: StyleDefinitionsPart stylePart =
17: mainPart.StyleDefinitionsPart;
18:
19: string styleId = stylePart.Styles
20: .Descendants<StyleName>()
21: .Where(s => s.Val.Value.Equals(styleName))
22: .Select(n => ((Style)n.Parent).StyleId)
23: .FirstOrDefault();
24:
25: return styleId ?? styleName;
26: }
27:
28: // 段落
29: public static IEnumerable<Paragraph> ParagraphsByStyleName(
30: this MainDocumentPart mainPart, string styleName)
31: {
32: string styleId = GetStyleIdFromStyleName(mainPart, styleName);
33:
34: IEnumerable<Paragraph> paraList = mainPart.Document
35: .Descendants<Paragraph>()
36: .Where(p => IsParagraphInStyle(p, styleId));
37:
38: return paraList;
39: }
40:
41: //判斷是否為段落
42: private static bool IsParagraphInStyle(Paragraph p,
43: string styleId)
44: {
45: ParagraphProperties pPr = p
46: .GetFirstChild<ParagraphProperties>();
47:
48: if (pPr != null)
49: {
50: ParagraphStyleId paraStyle = pPr.ParagraphStyleId;
51: if (paraStyle != null)
52: {
53: return paraStyle.Val.Value.Equals(styleId);
54: }
55: }
56: return false;
57: }
58:
59: public static IEnumerable<Run> RunsByStyleName(
60: this MainDocumentPart mainPart, string styleName)
61: {
62: string styleId = GetStyleIdFromStyleName(mainPart, styleName);
63:
64: IEnumerable<Run> runList = mainPart.Document
65: .Descendants<Run>()
66: .Where(r => IsRunInStyle(r, styleId));
67:
68: return runList;
69: }
70:
71: private static bool IsRunInStyle(Run r, string styleId)
72: {
73: RunProperties rPr = r.GetFirstChild<RunProperties>();
74:
75: if (rPr != null)
76: {
77: RunStyle runStyle = rPr.RunStyle;
78: if (runStyle != null)
79: {
80: return runStyle.Val.Value.Equals(styleId);
81: }
82: }
83: return false;
84: }
85:
86: //表格
87: public static IEnumerable<Table> TablesByStyleName(
88: this MainDocumentPart mainPart, string styleName)
89: {
90: string styleId = GetStyleIdFromStyleName(mainPart, styleName);
91: IEnumerable<Table> tableList = mainPart.Document
92: .Descendants<Table>()
93: .Where(t => IsTableInStyle(t, styleId));
94:
95: return tableList;
96: }
97:
98: //判斷是否為表格
99: private static bool IsTableInStyle(Table tbl, string styleId)
100: {
101: TableProperties tblPr = tbl.GetFirstChild<TableProperties>();
102:
103: if (tblPr != null)
104: {
105: TableStyle tblStyle = tblPr.TableStyle;
106: if (tblStyle != null)
107: {
108: return tblStyle.Val.Value.Equals(styleId);
109: }
110: }
111: return false;
112: }
113: }
114: }
>> 檔案下載:點我下載
---> 本文預設於 2010.05.20 登入「Office/Sharepoint 開發組」
>> 參考翻譯及引用:Retrieving Word Content Based on Styles