使用iTextSharp 5將html檔轉成PDF檔－幸運星-生活日誌

最近在寫網頁轉成PDF的程式，發現iTextSharp的html轉PDF的範例並沒有想像中好找，於是一邊參照別人的範例
一邊改寫測試，目前測試結果如下:

Program.cs

using System;
using System.IO;
using System.Collections;
using System.Collections.Generic;

using iTextSharp.text;
using iTextSharp.text.html;
using iTextSharp.text.html.simpleparser;
using iTextSharp.text.pdf;

namespace SimpleSample{
   class Program
   {
   static void Main(String[] args)
   {
   //讀取html檔,由於html檔的儲存內容編碼是utf-8,此處StreamReader編碼設定為utf-8
   StreamReader sr = new StreamReader("C:\\myproject\\Default.htm",
   System.Text.Encoding.UTF8);

   //建立Document,HTMLWorker及PdfWriter物件,並指定寫出PDF檔的路徑
   //文件格式為橫式A4,若建構式不加引數,預設為直式A4
   Document doc = new Document(PageSize.A4.Rotate());
   HTMLWorker hw = new HTMLWorker(doc);
   PdfWriter.GetInstance(doc, new FileStream("C:\\myproject\\Default.pdf",
   System.IO.FileMode.Create));

   //取得作業系統中有安裝的字型,通常在C:\WINDOWS\Fonts目錄下
   //取"windir"這一個環境變數,設定為C:\WINDOWS
   //若要觀看字型檔案的檔名,在C:\WINDOWS\Fonts目錄中以滑鼠右鍵點選檔案並按下內容
   FontFactory.Register(System.Environment.GetEnvironmentVariable("windir") +
   @"\Fonts\simhei.ttf");//SimHei字體(中易黑體)
   FontFactory.Register(System.Environment.GetEnvironmentVariable("windir") +
   @"\Fonts\MINGLIU.TTC");//細明體 & 新細明體
   FontFactory.Register(System.Environment.GetEnvironmentVariable("windir") +
   @"\Fonts\KAIU.TTF");//標楷體

   //建立樣式格式
   StyleSheet style = new StyleSheet();
   //設定html tag應用什麼樣式來解析
   //根據測試,LoadTagStyle(...)的參數意思如下:
   //第一個參數: html標籤名稱
   //第二個參數: 屬性名稱,目前解讀如下:
   // "face" 使用的字型(但要先取得系統字型)
   // "encoding" 字型編碼,Identity-H代表
   // The Unicode encoding with horizontal writing.
   // "leading" 列的高度

   //目前測試,標楷體尚無法寫出
   /*
   style.LoadTagStyle("body", "face", "KAIU");
   style.LoadTagStyle("body", "encoding", "Identity-H");
   style.LoadTagStyle("body", "leading", "50,0");
   */

   //body tag設定
   style.LoadTagStyle("body", "face", "SIMHEI");
   style.LoadTagStyle("body", "encoding", "Identity-H");
   style.LoadTagStyle("body", "leading", "12,0");

   //td tag設定,會蓋過body tag的設定
   style.LoadTagStyle("td", "face", "MINGLIU");
   style.LoadTagStyle("td", "encoding", "Identity-H");
   style.LoadTagStyle("td", "leading", "18,0");

   //開啟Document文件,並使用HTMLWorker解析html檔案的輸入串流後,匯出PDF檔
   doc.Open();

   //根據iTextSharp的解析,html的tag會解析成不同的iTextSharp物件,
   //ParseToList(...)第一個引數為html文件的輸入串流,第二個引數為樣式設定
   List<IElement> htmlElement = HTMLWorker.ParseToList(sr, style);
   for (int i = 0; i < htmlElement.Count; i++)
   {
   //以這一份html檔,iTextSharp將其解讀為iTextSharp.text.Paragraph與
   //iTextSharp.text.pdf.PdfPTable物件
   System.Console.WriteLine("第"+(i+1)+"個物件為: "+htmlElement[i]);
   //將每個被HTMLWorker解析的物件加到Document物件中
   doc.Add(htmlElement[i]);
   }

   doc.Close();
sr.Close();

   System.Console.WriteLine("匯出PDF成功");
   System.Console.ReadLine();
   }
   }
}

Default.htm

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>
</title></head>
<body>
   網頁的編碼方式: utf-8
   <form name="form1" method="post" action="Default.aspx" id="form1">
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKLTE0MjgzNDk0M2RkUIaSJB7sEQlvn4wtjWqK+XgUR/M=" />

<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEWAgLS6bSMBAKM54rGBk84uLYPQc79PVySE1ix0BNUh+wu" />
   <div>
   <table border="2">
   <tr><td>這是特殊字:俽</td><td>這也是特殊字:潁</td><td>看起來是可以</td><td>Good!</td></tr>
   <tr><td>0</td><td>1</td><td>2</td><td>3</td></tr>
   <tr><td>1</td><td>2</td><td>3</td><td>4</td></tr>
   <tr><td>2</td><td>3</td><td>4</td><td>5</td></tr>
   </table>
   </div>
   <input type="submit" name="Button1" value="aspx to html" id="Button1" />
   </form>
</body>
</html>

我沒試出來的功能，也希望網友不吝指導喔!

iTextSharp 5 download page
http://sourceforge.net/projects/itextsharp/