-
-
Notifications
You must be signed in to change notification settings - Fork 85
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make module executable (python -m pystdf) to start the scripts #10
base: master
Are you sure you want to change the base?
Changes from all commits
cb0818f
7edbe54
801e59e
3a64ce7
320f479
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,7 @@ | |
*.swp | ||
*.swo | ||
build/ | ||
dist/ | ||
docs/build | ||
pystdf.egg-info/ | ||
MANIFEST |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
|
||
""" | ||
Execute the pystdf module: apply a conversion to an STDF file. | ||
|
||
Usage: | ||
python -m pystdf [conversion] [stdf-file] | ||
|
||
Conversion is either txt, xml, xlsx, slice or count. | ||
""" | ||
|
||
import sys | ||
|
||
|
||
def print_help(): | ||
print("""pystdf | ||
|
||
Usage: | ||
python -m pystdf [conversion] [stdf-file] | ||
|
||
Conversion is either txt, xml, xlsx, slice or count. | ||
""") | ||
|
||
|
||
def main(): | ||
if len(sys.argv) < 3: | ||
print_help() | ||
return | ||
|
||
conversion, file = sys.argv[1:3] | ||
args = sys.argv[3:] | ||
|
||
if conversion not in ['txt', 'xml', 'xlsx', 'slice', 'count']: | ||
print_help() | ||
return | ||
|
||
if conversion == 'txt': | ||
from pystdf.script import totext | ||
totext.process_file([file]) | ||
elif conversion == 'xml': | ||
from pystdf.script import toxml | ||
toxml.process_file(file) | ||
elif conversion == 'xlsx': | ||
from pystdf.script import toexcel | ||
toexcel.to_excel(file) | ||
elif conversion == 'slice': | ||
from pystdf.script import slice | ||
start, count = args[:] | ||
slice.text_slice(file, int(start), int(count)) | ||
elif conversion == 'count': | ||
from pystdf.script import count | ||
count.process_file(file) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -19,59 +19,51 @@ | |||||||||
# | ||||||||||
|
||||||||||
from __future__ import print_function | ||||||||||
import sys, os, re | ||||||||||
import sys | ||||||||||
import re | ||||||||||
|
||||||||||
try: | ||||||||||
import gzip | ||||||||||
have_gzip = True | ||||||||||
except ImportError: | ||||||||||
have_gzip = False | ||||||||||
gzip = None | ||||||||||
try: | ||||||||||
import bz2 | ||||||||||
have_bz2 = True | ||||||||||
except ImportError: | ||||||||||
have_bz2 = False | ||||||||||
bz2 = None | ||||||||||
|
||||||||||
from pystdf.IO import Parser | ||||||||||
from pystdf.Indexing import RecordIndexer | ||||||||||
import pystdf.V4 | ||||||||||
|
||||||||||
#def info(type, value, tb): | ||||||||||
# import traceback, pdb | ||||||||||
# # You are not in interactive mode; print the exception | ||||||||||
# traceback.print_exception(type, value, tb) | ||||||||||
# # ... then star the debugger in post-mortem mode | ||||||||||
# pdb.pm() | ||||||||||
#sys.excepthook = info | ||||||||||
|
||||||||||
gzPattern = re.compile('\.g?z', re.I) | ||||||||||
bz2Pattern = re.compile('\.bz2', re.I) | ||||||||||
GZ_PATTERN = re.compile('\.g?z', re.I) | ||||||||||
BZ2_PATTERN = re.compile('\.bz2', re.I) | ||||||||||
Comment on lines
+38
to
+39
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
These are better names, just noticing I also forgot to anchor the file extension. Total driveby. It might also be reasonable to support stdin in the CLI, so one could pipe uncompressed output directly, without relying on this tool to do everything. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like this auto-uncompressing logic is repeated several places. We might refactor that... |
||||||||||
|
||||||||||
def process_file(fn): | ||||||||||
filename, = sys.argv[1:] | ||||||||||
|
||||||||||
def process_file(file_name): | ||||||||||
reopen_fn = None | ||||||||||
if filename is None: | ||||||||||
if file_name is None: | ||||||||||
f = sys.stdin | ||||||||||
elif gzPattern.search(filename): | ||||||||||
if not have_gzip: | ||||||||||
elif GZ_PATTERN.search(file_name): | ||||||||||
if not gzip: | ||||||||||
print("gzip is not supported on this system", file=sys.stderr) | ||||||||||
sys.exit(1) | ||||||||||
reopen_fn = lambda: gzip.open(filename, 'rb') | ||||||||||
reopen_fn = lambda: gzip.open(file_name, 'rb') | ||||||||||
f = reopen_fn() | ||||||||||
elif bz2Pattern.search(filename): | ||||||||||
if not have_bz2: | ||||||||||
elif BZ2_PATTERN.search(file_name): | ||||||||||
if not bz2: | ||||||||||
print("bz2 is not supported on this system", file=sys.stderr) | ||||||||||
sys.exit(1) | ||||||||||
reopen_fn = lambda: bz2.BZ2File(filename, 'rb') | ||||||||||
reopen_fn = lambda: bz2.BZ2File(file_name, 'rb') | ||||||||||
f = reopen_fn() | ||||||||||
else: | ||||||||||
f = open(filename, 'rb') | ||||||||||
p=Parser(inp=f, reopen_fn=reopen_fn) | ||||||||||
p.addSink(RecordIndexer()) | ||||||||||
f = open(file_name, 'rb') | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should be using |
||||||||||
indexer = RecordIndexer() | ||||||||||
p = Parser(inp=f, reopen_fn=reopen_fn) | ||||||||||
p.addSink(indexer) | ||||||||||
p.parse() | ||||||||||
f.close() | ||||||||||
print("Record count: ", indexer.recid) | ||||||||||
|
||||||||||
|
||||||||||
if __name__ == "__main__": | ||||||||||
if len(sys.argv) < 2: | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some of these seem like format conversions (txt, xml, xlsx), slicing seems like an output option, and count seems like a summary function. Might be worth splitting these into different subcommands and option arguments.