cmars · florisla · Jul 11, 2018 · Jul 11, 2018 · Jul 11, 2018 · Jul 12, 2018
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,7 @@
 *.swp
 *.swo
 build/
+dist/
 docs/build
+pystdf.egg-info/
+MANIFEST
diff --git a/pystdf/__main__.py b/pystdf/__main__.py
@@ -0,0 +1,55 @@
+
+"""
+Execute the pystdf module: apply a conversion to an STDF file.
+
+Usage:
+    python -m pystdf [conversion] [stdf-file]
+
+    Conversion is either txt, xml, xlsx, slice or count.
+"""
+
+import sys
+
+
+def print_help():
+    print("""pystdf
+
+Usage:
+    python -m pystdf [conversion] [stdf-file]
+
+    Conversion is either txt, xml, xlsx, slice or count.
+""")
+
+
+def main():
+    if len(sys.argv) < 3:
+        print_help()
+        return
+
+    conversion, file = sys.argv[1:3]
+    args = sys.argv[3:]
+
+    if conversion not in ['txt', 'xml', 'xlsx', 'slice', 'count']:
+        print_help()
+        return
+
+    if conversion == 'txt':
+        from pystdf.script import totext
+        totext.process_file([file])
+    elif conversion == 'xml':
+        from pystdf.script import toxml
+        toxml.process_file(file)
+    elif conversion == 'xlsx':
+        from pystdf.script import toexcel
+        toexcel.to_excel(file)
+    elif conversion == 'slice':
+        from pystdf.script import slice
+        start, count = args[:]
+        slice.text_slice(file, int(start), int(count))
+    elif conversion == 'count':
+        from pystdf.script import count
+        count.process_file(file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pystdf/script/__init__.py b/pystdf/script/__init__.py
diff --git a/scripts/rec_index → pystdf/script/count.py b/scripts/rec_index → pystdf/script/count.py
@@ -19,59 +19,51 @@
 #
 
 from __future__ import print_function
-import sys, os, re
+import sys
+import re
 
 try:
     import gzip
-    have_gzip = True
 except ImportError:
-    have_gzip = False
+    gzip = None
 try:
     import bz2
-    have_bz2 = True
 except ImportError:
-    have_bz2 = False
+    bz2 = None
 
 from pystdf.IO import Parser
 from pystdf.Indexing import RecordIndexer
-import pystdf.V4
 
-#def info(type, value, tb):
-#    import traceback, pdb
-#    # You are not in interactive mode; print the exception
-#    traceback.print_exception(type, value, tb)
-#    print
-#    # ... then star the debugger in post-mortem mode
-#    pdb.pm()
-#sys.excepthook = info
 
-gzPattern = re.compile('\.g?z', re.I)
-bz2Pattern = re.compile('\.bz2', re.I)
+GZ_PATTERN = re.compile('\.g?z', re.I)
+BZ2_PATTERN = re.compile('\.bz2', re.I)
-GZ_PATTERN = re.compile('\.g?z', re.I)
-BZ2_PATTERN = re.compile('\.bz2', re.I)
+GZ_PATTERN = re.compile('\.g?z$', re.I)
+BZ2_PATTERN = re.compile('\.bz2$', re.I)
-GZ_PATTERN = re.compile('\.g?z', re.I)
-BZ2_PATTERN = re.compile('\.bz2', re.I)
+GZ_PATTERN = re.compile('\.g?z$', re.I)
+BZ2_PATTERN = re.compile('\.bz2$', re.I)
 
-def process_file(fn):
-    filename, = sys.argv[1:]
 
+def process_file(file_name):
     reopen_fn = None
-    if filename is None:
+    if file_name is None:
         f = sys.stdin
-    elif gzPattern.search(filename):
-        if not have_gzip:
+    elif GZ_PATTERN.search(file_name):
+        if not gzip:
             print("gzip is not supported on this system", file=sys.stderr)
             sys.exit(1)
-        reopen_fn = lambda: gzip.open(filename, 'rb')
+        reopen_fn = lambda: gzip.open(file_name, 'rb')
         f = reopen_fn()
-    elif bz2Pattern.search(filename):
-        if not have_bz2:
+    elif BZ2_PATTERN.search(file_name):
+        if not bz2:
             print("bz2 is not supported on this system", file=sys.stderr)
             sys.exit(1)
-        reopen_fn = lambda: bz2.BZ2File(filename, 'rb')
+        reopen_fn = lambda: bz2.BZ2File(file_name, 'rb')
         f = reopen_fn()
     else:
-        f = open(filename, 'rb')
-    p=Parser(inp=f, reopen_fn=reopen_fn)
-    p.addSink(RecordIndexer())
+        f = open(file_name, 'rb')
+    indexer = RecordIndexer()
+    p = Parser(inp=f, reopen_fn=reopen_fn)
+    p.addSink(indexer)
     p.parse()
     f.close()
+    print("Record count: ", indexer.recid)
+
 
 if __name__ == "__main__":
     if len(sys.argv) < 2:

diff --git a/scripts/stdf_slice → pystdf/script/slice.py b/scripts/stdf_slice → pystdf/script/slice.py
@@ -18,20 +18,27 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 #
 
+import sys
+
 from pystdf.IO import Parser
-from pystdf.Mapping import *
-from pystdf.Writers import *
+from pystdf.Mapping import StreamMapper
+from pystdf.Writers import TextWriter
+
+
+def text_slice(file_name, start_index, record_count):
+    f = open(file_name, 'rb')
+    p = Parser(inp=f)
+    record_mapper = StreamMapper()
+    p.addSink(record_mapper)
+    p.parse(count=start_index+record_count)
+    p.addSink(TextWriter())
+    f.seek(record_mapper.indexes[start_index])
+    p.parse(count=record_count)
+
 
 if __name__ == '__main__':
     filename, start, count = sys.argv[1:4]
     start = int(start)
     count = int(count)
 
-    f = open(filename, 'rb')
-    p=Parser(inp=f)
-    record_mapper = StreamMapper()
-    p.addSink(record_mapper)
-    p.parse(count=start+count)
-    p.addSink(AtdfWriter())
-    f.seek(record_mapper.indexes[start])
-    p.parse(count=count)
+    text_slice(filename, start, count)
diff --git a/scripts/stdf2excel → pystdf/script/toexcel.py b/scripts/stdf2excel → pystdf/script/toexcel.py
@@ -19,36 +19,37 @@
 #
 # Modified: 2017 Minh-Hai Nguyen
 #
-import sys, os
+
+import sys
 from pystdf.Importer import STDF2DataFrame
-import pystdf.V4
+from pystdf import V4
 import pandas as pd
 
 
-def toExcel(fname,tables):
-    """ Export the tables from toTables to Excel
+def to_excel(stdf_file, xlsx_file=None):
+    """
+    Export the tables from toTables to Excel.
     """
-    writer = pd.ExcelWriter(fname)
-    for k,v in tables.items():
-        # Make sure the order of columns complies the specs
-        record = [r for r in V4.records if r.__class__.__name__.upper()==k]
-        if len(record)==0:
-            print("Ignore exporting table %s: No such record type exists." %k)
+    if xlsx_file is None:
+        xlsx_file = stdf_file[:stdf_file.rfind('.')] + ".xlsx"
+    print("Importing %s" % stdf_file)
+    tables = STDF2DataFrame(stdf_file)
+    print("Exporting to %s" % xlsx_file)
+
+    writer = pd.ExcelWriter(xlsx_file)
+    for k, v in tables.items():
+        # Make sure the order of columns complies to the specs
+        record = [r for r in V4.records if r.__class__.__name__.upper() == k]
+        if len(record) == 0:
+            print("Ignore exporting table %s: No such record type exists." % k)
         else:
             columns = [field[0] for field in record[0].fieldMap]
-            v.to_excel(writer,sheet_name=k,columns=columns,index=False,na_rep="N/A")
+            v.to_excel(writer, sheet_name=k, columns=columns, index=False, na_rep="N/A")
     writer.save()
 
-if __name__=="__main__":
-    if len(sys.argv)==1:
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
         print("Usage: %s <stdf file>" % (sys.argv[0]))
     else:
-        fin = sys.argv[1]
-        if len(sys.argv)>2:
-            fout = sys.argv[2]
-        else:
-            fout = fin[:fin.rfind('.')]+".xlsx"
-        print("Importing %s" %fin)
-        dfs= STDF2DataFrame(fin)
-        print("Exporting to %s" %fout)
-        toExcel(fout,dfs)
+        to_excel(sys.argv[1], sys.argv[2])
diff --git a/scripts/stdf2text → pystdf/script/totext.py b/scripts/stdf2text → pystdf/script/totext.py
@@ -19,58 +19,63 @@
 #
 
 from __future__ import print_function
-import sys, re
+import sys
+import re
 
 try:
     import gzip
-    have_gzip = True
 except ImportError:
-    have_gzip = False
+    gzip = None
 try:
     import bz2
-    have_bz2 = True
 except ImportError:
-    have_bz2 = False
+    bz2 = None
 
 from pystdf.IO import Parser
 from pystdf.Writers import TextWriter
-import pystdf.V4
 
-gzPattern = re.compile('\.g?z', re.I)
-bz2Pattern = re.compile('\.bz2', re.I)
 
-def process_file(fnames):
-    filename = fnames[0]
+GZ_PATTERN = re.compile('\.g?z', re.I)
+BZ2_PATTERN = re.compile('\.bz2', re.I)
+
+
+def process_file(file_names):
+    filename = file_names[0]
 
     reopen_fn = None
     if filename is None:
         f = sys.stdin
-    elif gzPattern.search(filename):
-        if not have_gzip:
+    elif GZ_PATTERN.search(filename):
+        if not gzip:
             print("gzip is not supported on this system", file=sys.stderr)
             sys.exit(1)
         reopen_fn = lambda: gzip.open(filename, 'rb')
         f = reopen_fn()
-    elif bz2Pattern.search(filename):
-        if not have_bz2:
+    elif BZ2_PATTERN.search(filename):
+        if not bz2:
             print("bz2 is not supported on this system", file=sys.stderr)
             sys.exit(1)
         reopen_fn = lambda: bz2.BZ2File(filename, 'rb')
         f = reopen_fn()
     else:
         f = open(filename, 'rb')
-    p=Parser(inp=f, reopen_fn=reopen_fn)
-    if len(fnames)<2:
+    p = Parser(inp=f, reopen_fn=reopen_fn)
+    if len(file_names) < 2:
         p.addSink(TextWriter())
         p.parse()
     else:
-        with open(fnames[1],'w') as fout:
+        with open(file_names[1], 'w') as fout:
             p.addSink(TextWriter(stream=fout))
             p.parse()
     f.close()
 
-if __name__ == "__main__":
+
+def main():
     if len(sys.argv) < 2:
         print("Usage: %s <stdf file>" % (sys.argv[0]))
     else:
         process_file(sys.argv[1:])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/stdf2xml → pystdf/script/toxml.py b/scripts/stdf2xml → pystdf/script/toxml.py
@@ -19,51 +19,50 @@
 #
 
 from __future__ import print_function
-import sys, re
+import sys
+import re
 
 try:
     import gzip
-    have_gzip = True
 except ImportError:
-    have_gzip = False
+    gzip = None
 try:
     import bz2
-    have_bz2 = True
 except ImportError:
-    have_bz2 = False
+    bz2 = None
 
 from pystdf.IO import Parser
 from pystdf.Writers import XmlWriter
-import pystdf.V4
 
-gzPattern = re.compile('\.g?z', re.I)
-bz2Pattern = re.compile('\.bz2', re.I)
 
-def process_file(fn):
-    filename, = sys.argv[1:]
+GZ_PATTERN = re.compile('\.g?z', re.I)
+BZ2_PATTERN = re.compile('\.bz2', re.I)
 
+
+def process_file(filename):
     reopen_fn = None
     if filename is None:
         f = sys.stdin
-    elif gzPattern.search(filename):
-        if not have_gzip:
+    elif GZ_PATTERN.search(filename):
+        if not gzip:
             print("gzip is not supported on this system", file=sys.stderr)
             sys.exit(1)
         reopen_fn = lambda: gzip.open(filename, 'rb')
         f = reopen_fn()
-    elif bz2Pattern.search(filename):
-        if not have_bz2:
+    elif BZ2_PATTERN.search(filename):
+        if not bz2:
             print("bz2 is not supported on this system", file=sys.stderr)
             sys.exit(1)
         reopen_fn = lambda: bz2.BZ2File(filename, 'rb')
         f = reopen_fn()
     else:
         f = open(filename, 'rb')
-    p=Parser(inp=f, reopen_fn=reopen_fn)
+    p = Parser(inp=f, reopen_fn=reopen_fn)
     p.addSink(XmlWriter())
     p.parse()
     f.close()
 
+
 if __name__ == "__main__":
     if len(sys.argv) < 2:
         print("Usage: %s <stdf file>" % (sys.argv[0]))