1.pdf 轉換 txt
通過 PDFBox 組件,生成txt文件。需要下載PDFBox 組件。
2.word excell 轉換txt
直接調用相應組件,另存為txt。
需要注意:
2.1 word 文檔關閉,需要調用
object SaveChange = false;
app.Quit(ref SaveChange, ref obj, ref obj);
2.2 excell 文檔關閉,需要調用
wbk.Close(Type.Missing, Type.Missing, Type.Missing);
wst = null;
wbk = null;
app.Quit();
在打開excell文檔的時候,賦值2個變量
app.Visible = false;//打開的excell不可見
app.DisplayAlerts = false;//不是顯示彈出對話框
3.下面是實現代碼:
3.1 構建IcDocument接口
public interface IcDocument { void TransformDocument(); }
3.2 構建操作基類 BaseDocument
public abstract class BaseDocument { /// <summary> /// 目標文件夾 /// </summary> protected string TargetFolder; /// <summary> /// 原文件 /// </summary> protected string source; /// <summary> /// 目標文件 /// </summary> protected string Target; protected virtual void GetCurrentTarget() { if (!Directory.Exists(TargetFolder)) { Directory.CreateDirectory(TargetFolder); } string fileName = Guid.NewGuid().ToString()+".txt"; Target= TargetFolder + @"\" + fileName; } public BaseDocument(string TargetFolder, string source) { this.source = source; this.TargetFolder = TargetFolder; GetCurrentTarget(); } }
3.3 構建 工程類 FactoryDocument,根據傳入的轉換文檔後綴,生成不同的子類。
public class FactoryDocument { /// <summary> /// 得到操作的文檔 /// </summary> /// <param name="TargetFolder">生成的文件夾</param> /// <param name="source">要讀取的文件</param> /// <returns></returns> public static IcDocument GetDocoment(string TargetFolder,string source) { FileInfo file = new FileInfo(source); IcDocument document = null; if (file.Exists) { switch (Path.GetExtension(source).ToUpper()) { case ".PDF": document = new PdfDocument(TargetFolder, source); break; case ".DOC": document = new WordDocument(TargetFolder, source); break; case ".XLS": document = new EexcelDocument(TargetFolder, source); break; default: document = new PdfDocument(TargetFolder, source); break; } } else { Console.WriteLine("文件沒有找"); } return document; } }
3.4 構建excell操作類 EexcelDocument : BaseDocument, IcDocument
public class EexcelDocument : BaseDocument, IcDocument { public EexcelDocument(string TargetFolder, string source) : base(TargetFolder,source ) { } #region IcDocument 成員 public void TransformDocument() { Microsoft.Office.Interop.Excel.Application app = new Microsoft.Office.Interop.Excel.Application(); app.Visible = false; app.DisplayAlerts = false; Microsoft.Office.Interop.Excel.Workbook wbk = app.Workbooks.Open(source, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing); Microsoft.Office.Interop.Excel.Worksheet wst = (Worksheet)wbk.Worksheets[1]; try { wbk.SaveAs(Target, XlFileFormat.xlUnicodeText, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, XlSaveAsAccessMode.xlNoChange, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing, System.Type.Missing); wbk.Close(Type.Missing, Type.Missing, Type.Missing); } catch (COMException ex) { Console.WriteLine(ex.Message); } finally { wst = null; wbk = null; app.Quit(); GC.Collect(); } } #endregion }
3.5 構建word 操作類 WordDocument : BaseDocument, IcDocument
public class WordDocument : BaseDocument, IcDocument { public WordDocument(string TargetFolder, string source) : base(TargetFolder,source ) { } #region IcDocument 成員 public void TransformDocument() { Application app = new Application(); Documents Docs = app.Documents; object obj = Missing.Value; object FileName = source; Docs.Open(ref FileName, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj); Document ad = app.ActiveDocument; try { FileName = Target; object FileFormat = null; FileFormat = WdSaveFormat.wdFormatText; ad.SaveAs(ref FileName, ref FileFormat, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj, ref obj); } catch (COMException ex) { Console.WriteLine(ex.Message); } finally { object SaveChange = false; app.Quit(ref SaveChange, ref obj, ref obj); GC.Collect(); } } #endregion }
3.6 構建pdf 操作類 PdfDocument : BaseDocument,IcDocument
public class PdfDocument : BaseDocument,IcDocument { public PdfDocument(string TargetFolder, string source) : base(TargetFolder,source ) { } public void pdf2txt(FileInfo file) { PDDocument doc = PDDocument.load(file.FullName); PDFTextStripper pdfStripper = new PDFTextStripper(); string text = pdfStripper.getText(doc); StreamWriter swPdfChange = new StreamWriter(Target, false, Encoding.GetEncoding(65001)); swPdfChange.Write(text); swPdfChange.Close(); } #region IcDocument 成員 public void TransformDocument() { FileInfo pdffile = new FileInfo(source); if (pdffile.Exists) { pdf2txt(pdffile); } else { Console.WriteLine("The File is NOT Exist."); } } #endregion }
3.7 在程序中使用
static void Main(string[] args) { IcDocument document = FactoryDocument.GetDocoment("c:\\temp", @"C:\Users\Desktop\changes.pdf"); document.TransformDocument(); document = FactoryDocument.GetDocoment("c:\\temp", @"D:\WorkDocuments\201203.xls"); document.TransformDocument(); }