' Parse PDF-documents and scans of documents with Microsoft Excel and Adobe Acrobat Pro ' ' /x86 - run in x86 mode ' /i - parse images ' /s - silent mode Option Explicit Const defaultRunX86Only = False Const defaultParseImages = True Const defaultSilentRun = False Dim ScriptHost: ScriptHost = Mid(WScript.FullName, InStrRev(WScript.FullName, "\") + 1, Len(WScript.FullName)) Dim objWScript: Set objWScript = CreateObject("WScript.Shell") Dim objProcEnv: Set objProcEnv = objWScript.Environment("Process") Dim args: Set args = WScript.Arguments.Named Dim doRunX86: If args.Exists("x86") Then doRunX86 = True Else doRunX86 = defaultRunX86Only ' If running in x64 and need run in x86 If doRunX86 And InStr(LCase(WScript.FullName), LCase(objProcEnv("windir") & "\System32\")) And objProcEnv("PROCESSOR_ARCHITECTURE") = "AMD64" Then If Not WScript.Arguments.Count = 0 Then Dim strArg, Arg strArg = "" For Each Arg In Wscript.Arguments strArg = strArg & " " & """" & Arg & """" Next End If Dim strCmd: strCmd = """" & objProcEnv("windir") & "\SysWOW64\" & ScriptHost & """" & " """ & WScript.ScriptFullName & """" & strArg objWScript.Run strCmd ' Main Else 'Excel.Workbooks.Add XlWBATemplate values Const xlWBATWorksheet = -4167 Const xlWBATChart = -4109 Const xlWBATExcel4IntlMacroSheet = 4 Const xlWBATExcel4MacroSheet = 3 'Excel.XlFileFormat enumeration Const xlWorkbookDefault = 51 Const xlExcel8 = 56 'Excel.XlInsertShiftDirection enumeration Const xlShiftDown = -4121 Const xlShiftToRight = -4161 'Excel.XlInsertFormatOrigin enumeration Const xlFormatFromLeftOrAbove = 0 Const xlFormatFromRightOrBelow = 1 'Excel.Range.HorizontalAlignment Const xlCenter = -4108 Const xlDistributed = -4117 Const xlJustify = -4130 Const xlLeft = -4131 Const xlRight = -4152 Const xlBottom = -4107 Const xlTop = -4160 'Excel Const xlLastCell = 11 Const xlDown = -4121 Const xlToLeft = -4159 Const xlToRight = -4161 Const xlUp = -4162 'Excel.Borders Const xlDiagonalDown = 5 Const xlDiagonalUp = 6 Const xlEdgeBottom = 9 Const xlEdgeLeft = 7 Const xlEdgeRight = 10 Const xlEdgeTop = 8 Const xlInsideHorizontal = 12 Const xlInsideVertical = 11 'Excel.Border.LineStyle Const xlContinuous = 1 Const xlDash = -4115 Const xlDashDot = 4 Const xlDashDotDot = 5 Const xlDot = -4118 Const xlDouble = -4119 Const xlLineStyleNone = -4142 Const xlSlantDashDot = 13 'Excel.Border.Weight XlBorderWeight values Const xlHairline = 1 Const xlMedium = -4138 Const xlThick = 4 Const xlThin = 2 'Excel.PageBreak XlPageBreak enumeration Const xlPageBreakAutomatic = -4105 Const xlPageBreakManual = -4135 Const xlPageBreakNone = -4142 'Excel.PageSetup.Orientation Const xlLandscape = 2 Const xlPortrait = 1 Dim imagesExtensions: imagesExtensions = Array("png", "ps", "eps", "jpg", "jpeg", "jpe", "tif", "tiff") Dim doParseImages: If args.Exists("i") Then doParseImages = True Else doParseImages = defaultParseImages Dim doSilentRun: If args.Exists("s") Then doSilentRun = True Else doSilentRun = defaultSilentRun Dim FileItem, SubFolder, ExcelFile, ExcelFiles, UserSelect Dim PdfFiles(), FilesCount: FilesCount = 0 Dim decSep: decSep = Mid(1.1, 2, 1) Dim objArgs: Set objArgs = WScript.Arguments Dim objShell: Set objShell = CreateObject("Shell.Application") Dim objFSO: Set objFSO = CreateObject("Scripting.FileSystemObject") Dim objRegExp: Set objRegExp = New RegExp objRegExp.IgnoreCase = True Dim ar, doDialogOpen: doDialogOpen = True For ar = 0 To objArgs.Count - 1 If Left(objArgs(ar), 1) <> "/" Then BuildFilesList objArgs(ar) If doDialogOpen Then doDialogOpen = False End If Next If doDialogOpen Then On Error Resume Next Dim objBrowFolder: Set objBrowFolder = objShell.BrowseForFolder(0, "Select folder for export PDF files to Excel", 0, "") Dim objFolder: Set objFolder = objFSO.GetFolder(objBrowFolder.Self.Path) If Err.Number = 0 Then On Error GoTo 0 UserSelect = MsgBox("Export PDF files from folder " & objFolder & vbCrLf & "to XLS and open in Excel? (No - export only)", vbYesNoCancel, "Export PDF files") If UserSelect = vbYes Or UserSelect = vbNo Then For Each FileItem In objFolder.Files Dim fileExt: fileExt = LCase(objFSO.GetExtensionName(FileItem.Name)) If fileExt = "pdf" Or doParseImages And InArray(fileExt, imagesExtensions) Then FilesCount = FilesCount + 1 ReDim Preserve PdfFiles(FilesCount) PdfFiles(FilesCount) = FileItem.Path End If Next End If End If End If If FilesCount > 0 Then ExcelFiles = SavePDFAsOtherFormat(PdfFiles, "xls") If IsNull(ExcelFiles) Then MsgBox "Adobe Acrobat Pro not found!", vbCritical, "Operation aborted" ElseIf IsEmpty(ExcelFiles) Then MsgBox "Wrong export file format!", vbCritical, "Operation aborted" ElseIf Not doDialogOpen Or UserSelect = vbYes Then On Error Resume Next Dim objExcel: Set objExcel = GetObject(, "Excel.Application") If Err.Number <> 0 Or objExcel Is Nothing Then Set objExcel = WScript.CreateObject("Excel.Application") If objExcel Is Object Then On Error GoTo 0 Dim f, i, j, k Dim jj: jj = 2 If doSilentRun Then objExcel.Visible = False Dim wbResult: Set wbResult = objExcel.WorkBooks.Add(xlWBATWorksheet) Dim wsOrders: Set wsOrders = wbResult.Sheets(1) Dim wsData: Set wsData = wbResult.Sheets.Add(, wbResult.Sheets(wbResult.Sheets.Count)) wsOrders.Name = "orders" wsData.Name = "data" wsOrders.Activate wsOrders.Cells(1, 1).Value = "file" wsOrders.Cells(1, 2).Value = "result" wsOrders.Cells(1, 3).Value = "order" wsOrders.Cells(1, 4).Value = "orderNum" wsOrders.Cells(1, 5).Value = "orderDate" wsOrders.Cells(1, 6).Value = "orderDead" wsOrders.Cells(1, 7).Value = "orderSum" wsOrders.Cells(1, 8).Value = "contSum" wsOrders.Cells(1, 9).Value = "control" wsOrders.Range(wsOrders.Cells(1, 1), wsOrders.Cells(1, 9)).Font.Bold = True wsOrders.Columns(1).ColumnWidth = 40 wsOrders.Columns(2).ColumnWidth = 9 wsOrders.Columns(3).ColumnWidth = 40 wsOrders.Columns(4).ColumnWidth = 20 wsOrders.Columns(5).ColumnWidth = 12 wsOrders.Columns(6).ColumnWidth = 12 wsOrders.Columns(7).ColumnWidth = 12 wsOrders.Columns(8).ColumnWidth = 12 wsOrders.Columns(9).ColumnWidth = 12 wsData.Cells(1, 1).Value = "orderRow" wsData.Cells(1, 2).Value = "order" wsData.Cells(1, 3).Value = "container" wsData.Cells(1, 4).Value = "contNum" wsData.Cells(1, 5).Value = "contDate1" wsData.Cells(1, 6).Value = "contDate2" wsData.Cells(1, 7).Value = "contSum" wsData.Range(wsData.Cells(1, 1), wsData.Cells(1, 7)).Font.Bold = True wsData.Columns(1).ColumnWidth = 9 wsData.Columns(2).ColumnWidth = 30 wsData.Columns(3).ColumnWidth = 50 wsData.Columns(4).ColumnWidth = 20 wsData.Columns(5).ColumnWidth = 12 wsData.Columns(6).ColumnWidth = 12 wsData.Columns(7).ColumnWidth = 12 For f = 1 To FilesCount If objFSO.FileExists(ExcelFiles(f)) Then Dim orderStr: orderStr = Empty Dim orderNum: orderNum = Empty Dim orderDate: orderDate = Empty Dim orderDead: orderDead = Empty Dim orderSum: orderSum = Empty Dim tmpStr, orderTmp, containerTmp Dim isHeadParsed: isHeadParsed = False Dim isTableParsed: isTableParsed = False Dim excelWorkBook: Set excelWorkBook = objExcel.WorkBooks.Open(ExcelFiles(f)) Dim excelSheet: Set excelSheet = excelWorkBook.Worksheets(1) Dim lastRow: lastRow = excelSheet.Cells(1, 1).SpecialCells(xlLastCell).Row Dim lastCol: lastCol = excelSheet.Cells(1, 1).SpecialCells(xlLastCell).Column Dim ff: ff = f + 1 For j = 1 To lastRow If isHeadParsed And Not isTableParsed Then tmpStr = DTrim(excelSheet.Cells(j, 1).Value) If Right(tmpStr, 13) = " д.свободных)" Then containerTmp = Split(tmpStr, " ") Dim contSum: contSum = 0 Do While DTrim(excelSheet.Cells(j + 1, 4).Value) = containerTmp(0) j = j + 1 contSum = contSum + CDbl(Replace(Replace(excelSheet.Cells(j, 8).Value, " ", ""), ".", decSep)) Loop wsData.Cells(jj, 1).Value = ff wsData.Cells(jj, 2).FormulaR1C1 = "=INDEX(orders!C3,RC1)" wsData.Cells(jj, 3).Value = tmpStr wsData.Cells(jj, 4).NumberFormat = "@" wsData.Cells(jj, 4).Value = containerTmp(0) wsData.Cells(jj, 5).NumberFormat = "dd.mm.yyyy" wsData.Cells(jj, 5).Value = containerTmp(2) wsData.Cells(jj, 6).NumberFormat = "dd.mm.yyyy" wsData.Cells(jj, 6).Value = containerTmp(4) wsData.Cells(jj, 7).NumberFormat = "0.00" wsData.Cells(jj, 7).Value = contSum jj = jj + 1 ElseIf tmpStr = "ВСЕГО:" Then isTableParsed = True orderSum = CDbl(Replace(Replace(excelSheet.Cells(j, 8).Value, " ", ""), ".", decSep)) End If Else For i = 1 To lastCol Dim doCheck: If excelSheet.Cells(j, i).Value > "" Then doCheck = True Else doCheck = False If doCheck And i > 1 Then If excelSheet.Cells(j, i - 1).Value = excelSheet.Cells(j, i).Value Then doCheck = False If doCheck And j > 1 Then If excelSheet.Cells(j - 1, i).Value = excelSheet.Cells(j, i).Value Then doCheck = False If doCheck Then Dim cellRows: cellRows = Split(Replace(Replace(excelSheet.Cells(j, i).Value, vbCrLf, vbLf), vbCr, vbLf), vbLf) For k = 0 To UBound(cellRows) tmpStr = Replace(LCase(DTrim(cellRows(k))), "ё", "е") If IsEmpty(orderStr) Then If Left(tmpStr, 5) = "счет " And InStr(tmpStr, " от ") Then orderStr = cellRows(k) tmpStr = DTrim(ClearString(tmpStr, Array("счет", "на оплату", "№", "no", "n", "г.", "г"))) orderTmp = Split(tmpStr, " от ") orderNum = orderTmp(0) If UBound(orderTmp) > 0 Then orderDate = orderTmp(1) End If Else If Left(tmpStr, 18) = "приложение к счету" Then If InStr(DTrim(cellRows(k)), DTrim(orderStr)) Then isHeadParsed = True End If End If If IsEmpty(orderDead) Then If Left(tmpStr, 11) = "оплатить до" And InStr(tmpStr, ":") Then orderTmp = Split(tmpStr, ":") orderDead = Trim(orderTmp(1)) End If End If Next End If Next End If Next Dim resultParse: resultParse = "NO" If isTableParsed Then resultParse = "OK" ElseIf isHeadParsed Then resultParse = "OK?" ElseIf Not IsEmpty(orderStr) Then resultParse = "YES" End If wsOrders.Cells(ff, 1).Value = PdfFiles(f) wsOrders.Cells(ff, 2).Value = resultParse wsOrders.Cells(ff, 3).Value = orderStr wsOrders.Cells(ff, 4).NumberFormat = "@" wsOrders.Cells(ff, 4).Value = orderNum wsOrders.Cells(ff, 5).NumberFormat = "dd.mm.yyyy" wsOrders.Cells(ff, 5).Value = orderDate wsOrders.Cells(ff, 6).NumberFormat = "dd.mm.yyyy" wsOrders.Cells(ff, 6).Value = orderDead wsOrders.Cells(ff, 7).NumberFormat = "0.00" wsOrders.Cells(ff, 7).Value = orderSum wsOrders.Cells(ff, 8).NumberFormat = "0.00" wsOrders.Cells(ff, 8).FormulaR1C1 = "=SUMIF(data!C1,ROW(),data!C7)" wsOrders.Cells(ff, 9).FormulaR1C1 = "=IF(RC[-2]-RC[-1]<>0,""!!!"","""")" excelWorkBook.Close False End If Next wbResult.Activate objExcel.Visible = True Dim objLocator: Set objLocator = CreateObject("WbemScripting.SWbemLocator") Dim objServices: Set objServices = objLocator.ConnectServer() Dim objProcess, objProps: Set objProps = objServices.ExecQuery("select * from Win32_Process where name = 'excel.exe'") For Each objProcess in objProps objWScript.AppActivate objProcess.ProcessId Next Else MsgBox "MS Excel not found!", vbCritical, "Operation aborted" End If Else MsgBox FilesCount & " files exported.", vbInformation, "Export complite" End If End If End If Function SavePDFAsOtherFormat(PDFPath, FileExtension) Dim objAcroApp, objAcroAVDoc, objAcroPDDoc, objJSO, boResult, ExportFormat, NewFilePath(), k If IsArray(PDFPath) Then k = UBound(PDFPath) Else k = 1 ReDim NewFilePath(k) NewFilePath(1) = "" On Error Resume Next Set objAcroApp = CreateObject("AcroExch.App") Set objAcroAVDoc = CreateObject("AcroExch.AVDoc") Select Case LCase(FileExtension) Case "eps": ExportFormat = "com.adobe.acrobat.eps" Case "html", "htm": ExportFormat = "com.adobe.acrobat.html" Case "jpeg", "jpg", "jpe": ExportFormat = "com.adobe.acrobat.jpeg" Case "jpf", "jpx", "jp2", "j2k", "j2c", "jpc": ExportFormat = "com.adobe.acrobat.jp2k" Case "docx": ExportFormat = "com.adobe.acrobat.docx" Case "doc": ExportFormat = "com.adobe.acrobat.doc" Case "png": ExportFormat = "com.adobe.acrobat.png" Case "ps": ExportFormat = "com.adobe.acrobat.ps" Case "rft": ExportFormat = "com.adobe.acrobat.rft" Case "xlsx": ExportFormat = "com.adobe.acrobat.xlsx" Case "xls": ExportFormat = "com.adobe.acrobat.spreadsheet" Case "txt": ExportFormat = "com.adobe.acrobat.accesstext" Case "tiff", "tif": ExportFormat = "com.adobe.acrobat.tiff" Case "xml": ExportFormat = "com.adobe.acrobat.xml-1-00" Case Else: ExportFormat = "" End Select If Err.Number = 0 Then On Error GoTo 0 If ExportFormat <> "" Then Dim i, s For i = 1 To k If IsArray(PDFPath) Then s = PDFPath(i) Else s = PDFPath boResult = objAcroAVDoc.Open(s, "") Set objAcroPDDoc = objAcroAVDoc.GetPDDoc Set objJSO = objAcroPDDoc.GetJSObject If LCase(FileExtension) <> "xls" Then NewFilePath(i) = Left(s, Len(s) - 3) & LCase(FileExtension) Else NewFilePath(i) = Left(s, Len(s) - 3) & "xml" boResult = objJSO.SaveAs(NewFilePath(i), ExportFormat) boResult = objAcroAVDoc.Close(True) Next boResult = objAcroApp.Exit Set objAcroPDDoc = Nothing Set objAcroAVDoc = Nothing Set objAcroApp = Nothing If IsArray(PDFPath) Then SavePDFAsOtherFormat = NewFilePath Else SavePDFAsOtherFormat = NewFilePath(1) Else SavePDFAsOtherFormat = Empty End If Else SavePDFAsOtherFormat = Null End If End Function Function DTrim(inText) DTrim = Trim(Replace(inText, Chr(8), " ")) Do While InStr(1, DTrim, " ") > 0 DTrim = Replace(DTrim, " ", " ") Loop End Function Function InArray(checkValue, paramArr()) Dim i InArray = False For i = 0 To UBound(paramArr) If checkValue = paramArr(i) Then InArray = True: Exit For Next End Function Function ClearString(checkValue, paramArr()) Dim i For i = 0 To UBound(paramArr) checkValue = Replace(checkValue, paramArr(i), "") Next ClearString = checkValue End Function Sub BuildFilesList(pathMask) objRegExp.Pattern = EscapePath(pathMask) Dim searchFolder Dim k: k = InStr(pathMask, "?") Dim kk: kk = InStr(pathMask, "*") If kk > 0 And (k > kk Or k = 0) Then k = kk If k > 0 Then k = InStrRev(pathMask, "\", k) Else k = InStrRev(pathMask, "\", -1) If k > 0 Then searchFolder = Left(pathMask, k) Else searchFolder = pathMask SearchFiles objFSO.GetFolder(searchFolder) End Sub Sub SearchFiles(inFolder) Dim i, j, objFile, objSubFolder, objMatches For Each objFile In inFolder.Files Set objMatches = objRegExp.Execute(objFile.Path) If objMatches.Count > 0 Then For i = 0 To objMatches.Count - 1 Dim fileExt: fileExt = LCase(objFSO.GetExtensionName(objMatches.Item(i).Value)) If fileExt = "pdf" Or doParseImages And InArray(fileExt, imagesExtensions) Then FilesCount = FilesCount + 1 ReDim Preserve PdfFiles(FilesCount) PdfFiles(FilesCount) = objMatches.Item(i).Value End If Next End If Next For Each objSubFolder In inFolder.SubFolders SearchFiles objSubFolder Next End Sub Function EscapePath(path) EscapePath = Replace(Replace(Replace(Replace(path, "\", "\\"), ".", "\."), "*", ".*"), "?", ".?") End Function |