[WinActor]CSVの差分出力（高速版） |

前回のスクリプトで試しに大きめのデータを実行してみたら、エラーこそ出ないもののめっちゃ時間かかった。

1500×1500くらいのガチ二重ループだものね・・・。

ってことで、さっくり高速で終わるバージョンのスクリプトも書いてみました。

１．CSV差分出力（高速版）

高速化する為に、ADODB.Connectionっていうの使ってCSVをテーブルとして扱ってSQL実行しています。

前回のスクリプトは端末選びませんが、今回のスクリプトは端末によってはODBCデータソースとかDBドライバーの登録状況によって動かない場合もあるかも。

それでもこっちの方が高速だから、ひとまずこっち試してエラー出た場合だけ前回のスクリプト使うってことでいいかな。

ただ、制約としてこっちのスクリプトの場合は、２つのCSVが同じフォルダに配置されていないと動きません。

いつもの如く、スクリプト実行ステージのスクリプトタブに張り付ければすぐに使えるスクリプトです。

Dim BaseFolder, objFS, SrcFN, MtchFN
Dim CsvPath, CsvName
Dim SrcTxt, lnTXT, ColHeader
Dim KeyCols
Dim OutPutFile, OutPutStr

Set objFS = CreateObject("Scripting.FileSystemObject")

SrcFN = !親CSV!
MtchFN = !間引くCSV!
KeyCols = split(Replace(!判定カラム(カンマ区切り)!, """", ""), ",")
OutPutFile = !出力ファイル名!

BaseFolder = objFS.GetParentFolderName(SrcFN)

If BaseFolder <> objFS.GetParentFolderName(MtchFN) Then
  Err.Raise 501, "", "2つのCSVは同じフォルダに配置して下さい。"
  WScript.Quit
End If

For Each CsvName In Array(SrcFN, MtchFN)
  If Not objFS.FileExists(CsvName) Then
    Err.Raise 404, "", "「" & CsvName & "」ファイルが見つかりません。"
    WScript.Quit
  End If
Next

SrcTxt = Read_TEXT(SrcFN)
lnTXT = split(SrcTxt, vbCrLf)
ColHeader = splitEx(lnTXT(0), NULL, NULL, NULL)

SrcFN = objFS.GetFileName(SrcFN)
MtchFN = objFS.GetFileName(MtchFN)

Set objFS = Nothing

ColSQL = ""
For i=0 To UBound(ColHeader)
  If Len(ColSQL) > 0 Then ColSQL = ColSQL & ", "
  ColSQL = ColSQL & "[t1." & ColHeader(i) & "]"
Next

PartSQL = ""
For i=0 To UBound(KeyCols)
  If Len(PartSQL) > 0 Then PartSQL = PartSQL & " AND "
  PartSQL = PartSQL & "t1." & KeyCols(i) & " = t2." & KeyCols(i)
Next
PartSQL = PartSQL & ") where t2." & KeyCols(0) & " is null"

strSQL = "SELECT " & ColSQL & " FROM [" & SrcFN & "] as t1 left join [" & MtchFN & "] AS t2 on (" & PartSQL

Set aq = New QueryADO
Set rec = aq.GetRecordFromCSV(BaseFolder, strSQL)

For Each fld In rec.Fields
  OutPutStr = OutPutStr & fld.Name & ","
Next
OutPutStr = OutPutStr & vbNewLine

Do While Not rec.EOF
    lnStr = ""
    For Each fld In rec.Fields
      If Len(lnStr) > 0 Then lnStr = lnStr & ","
      lnStr = lnStr & rec(fld.Name).Value
    Next
    OutPutStr = OutPutStr & lnStr & vbNewLine
    rec.MoveNext
Loop
Set aq = Nothing

Write_TEXT OutPutStr, OutPutFile


WScript.Quit

' ADOでCSVにSQL発行
Class QueryADO
  Private objADO
  Private objRS
  
  Public Sub Class_Initialize
    Set objADO = CreateObject("ADODB.Connection")
  End Sub
  
  Public Sub Class_Terminate
    On Error Resume Next
    objRS.Close
    Set objRs = Nothing
    objADO.Close
    Set objADO = Nothing
    On Error GoTo 0
  End Sub
  
  Function GetRecordFromCSV(BaseFolder, pSQL)
    objADO.Open "Driver={Microsoft Text Driver (*.txt; *.csv)};DBQ=" & BaseFolder & ";ReadOnly=1"
    Set GetRecordFromCSV = objADO.Execute(pSQL)
  End Function
End Class


'■ メイン処理ここまで ■ 以下、呼び出し関数 ■

Function Read_TEXT(FilePath)
  Dim objFS, objTXT, Rslt
  Set objFS = CreateObject("Scripting.FileSystemObject")
  If objFS.FileExists(FilePath) Then
    Set objTXT = objFS.OpenTextFile(FilePath, 1)
    Rslt = objTXT.ReadAll()
    objTXT.close
    Set objTXT = Nothing
  Else
    Rslt = ""
  End If
  Set objFS = Nothing
  Read_TEXT = Rslt
End Function

Function Write_TEXT(ContentStr, FilePath)
  Dim objFS, objTXT
  On Error Resume Next
  Set objFS = CreateObject("Scripting.FileSystemObject")
  Set objTXT = objFS.OpenTextFile(FilePath, 2, True)
  objTXT.Write ContentStr
  objTXT.close
  Set objTXT = Nothing
  Set objFS = Nothing
  If Err.Number > 0 Then
    Write_TEXT = False
    Err.Clear
  Else
    Write_TEXT = True
  End If
End Function


' 引用: http://blog.livedoor.jp/tea_cocoa_cake/archives/5356742.html
'! Split()のテキスト区切り対応版
'! テキスト区切り文字(例CSVの")に対応した区切りを行う

'! @param   source        元文字列
'! @param   colDelim      列区切り文字(NULL可、NULLの場合「,」使用)
'! @param   lineDelim     行区切り文字(NULL可、NULLの場合vbCrLfを使用)
'! @param   textDelim     テキスト区切り文字(NULL可、NULLの場合「"」を使用) (textDelim2つでテキスト区切り文字エスケープ)
'! @return  1次元配列 (改行がある場合は配列要素としてvbNullChar単体が格納される
public function splitEx(source, colDelim, lineDelim, textDelim)
    splitEx = NULL
    dim textMode: textMode = False
    
    if (isNull(colDelim) ) Then
        colDelim = ","
    end if
    if (isNull(lineDelim) ) Then
        lineDelim = vbCrLf
    end if
    if (isNull(textDelim) ) Then
        textDelim = """"
    end if
    
    dim ab : set ab = New ArrayBuilder
    dim textBuf : textBuf = "" ' テキストバッファ
    dim char_i  : char_i = 1   ' 文字列のインデックス
    Do while (char_i <= len(source))
        dim curChar : curChar = getChar(source, char_i)
        
        if(textMode = True) Then
	        select case curChar
	        case textDelim
                '! 1文字先読み And エスケープ判定
                if ( getChar(source, char_i + 1) = textDelim ) Then
                    ' エスケープ
                    textBuf = textBuf & getChar(source, char_i + 1)
                    char_i = char_i + 1 ' 先読み分カウンタを加算
                else
                    ' テキストモードOFF
                    textMode = False
                end if
	        case Else
	            textBuf = textBuf & curChar
	        end select
        else
	        select case curChar
	        case colDelim
                ab.add textBuf
                textBuf = ""
	        case lineDelim
                ab.add textBuf
	            ab.add vbNullChar ' 改行を示す
                textBuf = ""
            case vbCr
                '! 1文字先読み And lineDelim=vbCrLf(※2文字)の場合の特殊な判定
                if ( getChar(source, char_i + 1) = vbLf And lineDelim = vbCrLf ) Then
	                ab.add textBuf
	                ab.add vbNullChar ' 改行を示す
	                textBuf = ""
	                char_i = char_i + 1 ' 先読み分カウンタを加算
	            else
	                textBuf = textBuf & curChar
                end if
	        case textDelim
                ' テキストモードON
                textMode = True
	        case Else
	            textBuf = textBuf & curChar
	        end select
        end if
        
        char_i = char_i + 1
    loop
    
    ' 最後にテキストバッファの残りを処理
    ab.add textBuf

    splitEx = ab.toArray()
end function


'! 文字列から1文字取得。文字列終端(VBScripでは通常参照しない)の場合ではvbNullChar(00)を返す
'! @param  source  元文字列
'! @param  index   文字列のインデックス
'! @return 文字
private function getChar(source, index)
    getChar = ""

    if (index <= 0 Or index > (len(source) + 1) ) Then
        err.raise 1025,,"範囲外の参照"
        exit function
    end if
    
    ' 文字列終端の場合
    if (index = (len(source) + 1) ) Then
        getChar = vbNullChar
    end if
    
    getChar = mid(source, index, 1)
end function


'! 配列生成
class ArrayBuilder
private my_lastIndex
private my_array()

Public Sub Class_Initialize
    dim INITIAL_SIZE : INITIAL_SIZE = 8
    my_lastIndex = -1
    redim Preserve my_array(INITIAL_SIZE - 1) ' 注意...配列は(指定サイズ + 1)のサイズで領域が確保される
End Sub

Public Sub Class_Terminate
End Sub

'! 値の参照
'! @param  index  配列インデックス
'! @return 値
public property get item(index)
    if (index < 0 Or index > my_lastIndex) Then
        err.raise 1025,,"範囲外の参照"
        exit property
    end if
    
    item = my_array(index)
end property

'! 値のセット
'! @param  index  配列インデックス
'! @param  value  値
public property let item(index, value)
    if (index < 0) Then
        err.raise 1025,,"範囲外の参照"
        exit property
    end if
    
    ' Expand
    Do While (index >= getSize() )
        call expand()
    Loop
    
    if (index > my_lastIndex) Then
        my_lastIndex    = index
    end if

    my_array(index) = value
end property

'! 最後尾に値の追加
'! @param  value  値
public sub add(value)
    me.item(my_lastIndex + 1) = value
end sub

'! 配列拡張
private sub expand()
    '+ wscript.echo "#Expanded!"
    ' 再確保のオーバヘッド軽減のため大きめにサイズを拡張
    redim Preserve my_array(getSize() * 2 - 1)
end sub

'! 配列サイズ取得
'! @return 現在の配列サイズ
private function getSize()
    '+ wscript.echo "#size:" & UBound(my_array) - LBound(my_array) + 1 
    getSize = UBound(my_array) - LBound(my_array) + 1 
end function

'! 要素に合わせて配列サイズを縮小
'! @param  arr  配列
'! @return 縮小後の配列
private function fit(ByRef arr)
    redim Preserve arr(my_lastIndex)
    fit = arr
end function

'! 配列を返す
'! @return 配列
public function toArray()
    dim tmpArray : tmpArray = my_array
    tmpArray = fit(tmpArray)
    toArray = tmpArray
end function

end class

使い方のイメージは前回の記事を参照して下さい。

ADODB.Connection使うと、ExcelのシートにもSQL実行できるみたいですねぃ。

使ったことないから今度試してみよ。

[WinActor]CSVの差分出力（高速版）

１．CSV差分出力（高速版）

コメントを残す コメントをキャンセル

Related Post

[WinActor]合成音声でしゃべる。[WinActor]合成音声でしゃべる。

[WinActor]ひらがな⇔カタカナのフリガナ変換[WinActor]ひらがな⇔カタカナのフリガナ変換

[WinActor]CSVにSQL実行【抽出・削除】[WinActor]CSVにSQL実行【抽出・削除】

コメントを残すコメントをキャンセル