Python is an excellent tool for scanning and manipulating textual data.
Example: Strip token from certain files in a folder
import os
class Info(object):
def __init__(self, ID=None):
self.ID = ID
def to_dict(self):
return {
'ID': self.ID,
}
def ParseFile (filepath, keyword, stopword, token):
with open(filepath) as oFile:
for line in oFile:
pos = line.find(keyword)
if (pos >= 0):
startPos = pos + len(keyword)
pos = line.find(stopword, startPos)
if (pos >= 0):
endPos = pos
token = line[startPos:endPos]
my_list.append (Info(token))
my_list = []
token = ''
rootdir = "C:\Log"
for subdir, dirs, files in os.walk(rootdir):
for file in files:
if file.startswith("tfs_"):
filepath = subdir + os.sep + file
#print(filepath)
ParseFile (filepath, "/sitecore/", ".item", token)
import pandas as pd
my_df = pd.DataFrame.from_records([dr.to_dict() for dr in my_list])
my_df.to_csv('out.csv', index=False, header=False)