Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make module executable (python -m pystdf) to start the scripts #10

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
*.swp
*.swo
build/
dist/
docs/build
pystdf.egg-info/
MANIFEST
55 changes: 55 additions & 0 deletions pystdf/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

"""
Execute the pystdf module: apply a conversion to an STDF file.

Usage:
python -m pystdf [conversion] [stdf-file]

Conversion is either txt, xml, xlsx, slice or count.
"""

import sys


def print_help():
print("""pystdf

Usage:
python -m pystdf [conversion] [stdf-file]

Conversion is either txt, xml, xlsx, slice or count.
""")


def main():
if len(sys.argv) < 3:
print_help()
return

conversion, file = sys.argv[1:3]
args = sys.argv[3:]

if conversion not in ['txt', 'xml', 'xlsx', 'slice', 'count']:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of these seem like format conversions (txt, xml, xlsx), slicing seems like an output option, and count seems like a summary function. Might be worth splitting these into different subcommands and option arguments.

print_help()
return

if conversion == 'txt':
from pystdf.script import totext
totext.process_file([file])
elif conversion == 'xml':
from pystdf.script import toxml
toxml.process_file(file)
elif conversion == 'xlsx':
from pystdf.script import toexcel
toexcel.to_excel(file)
elif conversion == 'slice':
from pystdf.script import slice
start, count = args[:]
slice.text_slice(file, int(start), int(count))
elif conversion == 'count':
from pystdf.script import count
count.process_file(file)


if __name__ == '__main__':
main()
Empty file added pystdf/script/__init__.py
Empty file.
48 changes: 20 additions & 28 deletions scripts/rec_index → pystdf/script/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,59 +19,51 @@
#

from __future__ import print_function
import sys, os, re
import sys
import re

try:
import gzip
have_gzip = True
except ImportError:
have_gzip = False
gzip = None
try:
import bz2
have_bz2 = True
except ImportError:
have_bz2 = False
bz2 = None

from pystdf.IO import Parser
from pystdf.Indexing import RecordIndexer
import pystdf.V4

#def info(type, value, tb):
# import traceback, pdb
# # You are not in interactive mode; print the exception
# traceback.print_exception(type, value, tb)
# print
# # ... then star the debugger in post-mortem mode
# pdb.pm()
#sys.excepthook = info

gzPattern = re.compile('\.g?z', re.I)
bz2Pattern = re.compile('\.bz2', re.I)
GZ_PATTERN = re.compile('\.g?z', re.I)
BZ2_PATTERN = re.compile('\.bz2', re.I)
Comment on lines +38 to +39
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
GZ_PATTERN = re.compile('\.g?z', re.I)
BZ2_PATTERN = re.compile('\.bz2', re.I)
GZ_PATTERN = re.compile('\.g?z$', re.I)
BZ2_PATTERN = re.compile('\.bz2$', re.I)

These are better names, just noticing I also forgot to anchor the file extension. Total driveby.

It might also be reasonable to support stdin in the CLI, so one could pipe uncompressed output directly, without relying on this tool to do everything.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this auto-uncompressing logic is repeated several places. We might refactor that...


def process_file(fn):
filename, = sys.argv[1:]

def process_file(file_name):
reopen_fn = None
if filename is None:
if file_name is None:
f = sys.stdin
elif gzPattern.search(filename):
if not have_gzip:
elif GZ_PATTERN.search(file_name):
if not gzip:
print("gzip is not supported on this system", file=sys.stderr)
sys.exit(1)
reopen_fn = lambda: gzip.open(filename, 'rb')
reopen_fn = lambda: gzip.open(file_name, 'rb')
f = reopen_fn()
elif bz2Pattern.search(filename):
if not have_bz2:
elif BZ2_PATTERN.search(file_name):
if not bz2:
print("bz2 is not supported on this system", file=sys.stderr)
sys.exit(1)
reopen_fn = lambda: bz2.BZ2File(filename, 'rb')
reopen_fn = lambda: bz2.BZ2File(file_name, 'rb')
f = reopen_fn()
else:
f = open(filename, 'rb')
p=Parser(inp=f, reopen_fn=reopen_fn)
p.addSink(RecordIndexer())
f = open(file_name, 'rb')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be using with open(...) as f here and everywhere else we open files in this project.

indexer = RecordIndexer()
p = Parser(inp=f, reopen_fn=reopen_fn)
p.addSink(indexer)
p.parse()
f.close()
print("Record count: ", indexer.recid)


if __name__ == "__main__":
if len(sys.argv) < 2:
Expand Down
27 changes: 17 additions & 10 deletions scripts/stdf_slice → pystdf/script/slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,27 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#

import sys

from pystdf.IO import Parser
from pystdf.Mapping import *
from pystdf.Writers import *
from pystdf.Mapping import StreamMapper
from pystdf.Writers import TextWriter


def text_slice(file_name, start_index, record_count):
f = open(file_name, 'rb')
p = Parser(inp=f)
record_mapper = StreamMapper()
p.addSink(record_mapper)
p.parse(count=start_index+record_count)
p.addSink(TextWriter())
f.seek(record_mapper.indexes[start_index])
p.parse(count=record_count)


if __name__ == '__main__':
filename, start, count = sys.argv[1:4]
start = int(start)
count = int(count)

f = open(filename, 'rb')
p=Parser(inp=f)
record_mapper = StreamMapper()
p.addSink(record_mapper)
p.parse(count=start+count)
p.addSink(AtdfWriter())
f.seek(record_mapper.indexes[start])
p.parse(count=count)
text_slice(filename, start, count)
45 changes: 23 additions & 22 deletions scripts/stdf2excel → pystdf/script/toexcel.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,37 @@
#
# Modified: 2017 Minh-Hai Nguyen
#
import sys, os

import sys
from pystdf.Importer import STDF2DataFrame
import pystdf.V4
from pystdf import V4
import pandas as pd


def toExcel(fname,tables):
""" Export the tables from toTables to Excel
def to_excel(stdf_file, xlsx_file=None):
"""
Export the tables from toTables to Excel.
"""
writer = pd.ExcelWriter(fname)
for k,v in tables.items():
# Make sure the order of columns complies the specs
record = [r for r in V4.records if r.__class__.__name__.upper()==k]
if len(record)==0:
print("Ignore exporting table %s: No such record type exists." %k)
if xlsx_file is None:
xlsx_file = stdf_file[:stdf_file.rfind('.')] + ".xlsx"
print("Importing %s" % stdf_file)
tables = STDF2DataFrame(stdf_file)
print("Exporting to %s" % xlsx_file)

writer = pd.ExcelWriter(xlsx_file)
for k, v in tables.items():
# Make sure the order of columns complies to the specs
record = [r for r in V4.records if r.__class__.__name__.upper() == k]
if len(record) == 0:
print("Ignore exporting table %s: No such record type exists." % k)
else:
columns = [field[0] for field in record[0].fieldMap]
v.to_excel(writer,sheet_name=k,columns=columns,index=False,na_rep="N/A")
v.to_excel(writer, sheet_name=k, columns=columns, index=False, na_rep="N/A")
writer.save()

if __name__=="__main__":
if len(sys.argv)==1:

if __name__ == "__main__":
if len(sys.argv) == 1:
print("Usage: %s <stdf file>" % (sys.argv[0]))
else:
fin = sys.argv[1]
if len(sys.argv)>2:
fout = sys.argv[2]
else:
fout = fin[:fin.rfind('.')]+".xlsx"
print("Importing %s" %fin)
dfs= STDF2DataFrame(fin)
print("Exporting to %s" %fout)
toExcel(fout,dfs)
to_excel(sys.argv[1], sys.argv[2])
41 changes: 23 additions & 18 deletions scripts/stdf2text → pystdf/script/totext.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,58 +19,63 @@
#

from __future__ import print_function
import sys, re
import sys
import re

try:
import gzip
have_gzip = True
except ImportError:
have_gzip = False
gzip = None
try:
import bz2
have_bz2 = True
except ImportError:
have_bz2 = False
bz2 = None

from pystdf.IO import Parser
from pystdf.Writers import TextWriter
import pystdf.V4

gzPattern = re.compile('\.g?z', re.I)
bz2Pattern = re.compile('\.bz2', re.I)

def process_file(fnames):
filename = fnames[0]
GZ_PATTERN = re.compile('\.g?z', re.I)
BZ2_PATTERN = re.compile('\.bz2', re.I)


def process_file(file_names):
filename = file_names[0]

reopen_fn = None
if filename is None:
f = sys.stdin
elif gzPattern.search(filename):
if not have_gzip:
elif GZ_PATTERN.search(filename):
if not gzip:
print("gzip is not supported on this system", file=sys.stderr)
sys.exit(1)
reopen_fn = lambda: gzip.open(filename, 'rb')
f = reopen_fn()
elif bz2Pattern.search(filename):
if not have_bz2:
elif BZ2_PATTERN.search(filename):
if not bz2:
print("bz2 is not supported on this system", file=sys.stderr)
sys.exit(1)
reopen_fn = lambda: bz2.BZ2File(filename, 'rb')
f = reopen_fn()
else:
f = open(filename, 'rb')
p=Parser(inp=f, reopen_fn=reopen_fn)
if len(fnames)<2:
p = Parser(inp=f, reopen_fn=reopen_fn)
if len(file_names) < 2:
p.addSink(TextWriter())
p.parse()
else:
with open(fnames[1],'w') as fout:
with open(file_names[1], 'w') as fout:
p.addSink(TextWriter(stream=fout))
p.parse()
f.close()

if __name__ == "__main__":

def main():
if len(sys.argv) < 2:
print("Usage: %s <stdf file>" % (sys.argv[0]))
else:
process_file(sys.argv[1:])


if __name__ == "__main__":
main()
29 changes: 14 additions & 15 deletions scripts/stdf2xml → pystdf/script/toxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,51 +19,50 @@
#

from __future__ import print_function
import sys, re
import sys
import re

try:
import gzip
have_gzip = True
except ImportError:
have_gzip = False
gzip = None
try:
import bz2
have_bz2 = True
except ImportError:
have_bz2 = False
bz2 = None

from pystdf.IO import Parser
from pystdf.Writers import XmlWriter
import pystdf.V4

gzPattern = re.compile('\.g?z', re.I)
bz2Pattern = re.compile('\.bz2', re.I)

def process_file(fn):
filename, = sys.argv[1:]
GZ_PATTERN = re.compile('\.g?z', re.I)
BZ2_PATTERN = re.compile('\.bz2', re.I)


def process_file(filename):
reopen_fn = None
if filename is None:
f = sys.stdin
elif gzPattern.search(filename):
if not have_gzip:
elif GZ_PATTERN.search(filename):
if not gzip:
print("gzip is not supported on this system", file=sys.stderr)
sys.exit(1)
reopen_fn = lambda: gzip.open(filename, 'rb')
f = reopen_fn()
elif bz2Pattern.search(filename):
if not have_bz2:
elif BZ2_PATTERN.search(filename):
if not bz2:
print("bz2 is not supported on this system", file=sys.stderr)
sys.exit(1)
reopen_fn = lambda: bz2.BZ2File(filename, 'rb')
f = reopen_fn()
else:
f = open(filename, 'rb')
p=Parser(inp=f, reopen_fn=reopen_fn)
p = Parser(inp=f, reopen_fn=reopen_fn)
p.addSink(XmlWriter())
p.parse()
f.close()


if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: %s <stdf file>" % (sys.argv[0]))
Expand Down
Loading