!/usr/bin/env python
schema_parser_batch.py - Python 2.7.5 compatible
import json
import re
import sys
import os
def parse_isql_output(filename, table_name):
"""Parse the messy isql output and convert to JSON schema"""
schema = {
"table_name": table_name,
"columns": [],
"export_info": {
"delimiter": ",",
"has_headers": False,
"text_qualifier": "\"",
"date_format": "Mon DD YYYY HH:MI:SS:mmmAM",
"null_string": "NULL"
}
}
with open(filename, 'r') as f:
content = f.read()
# Extract column information from the messy output
lines = content.split('\n')
i = 0
col_id = 1
while i < len(lines):
line = lines[i].strip()
# Skip command lines and empty lines
if line and not line.startswith('+') and line != table_name:
# This might be a column name
col_name = line.strip()
# Look for data type in next non-empty lines
data_type = None
for j in range(i+1, min(i+5, len(lines))):
next_line = lines[j].strip()
if next_line and next_line not in [table_name, 'int', 'varchar', 'datetime', 'char', 'text', 'image', 'decimal', 'numeric', 'money', 'bit']:
# Common Sybase data types pattern matching
type_patterns = {
'int': r'\bint\b',
'varchar': r'\bvarchar\b',
'char': r'\bchar\b',
'datetime': r'\bdatetime\b',
'text': r'\btext\b',
'image': r'\bimage\b',
'decimal': r'\bdecimal\b',
'numeric': r'\bnumeric\b',
'money': r'\bmoney\b',
'bit': r'\bbit\b'
}
for type_key, pattern in type_patterns.items():
if re.search(pattern, next_line.lower()):
data_type = type_key
break
if data_type:
break
if col_name and data_type:
# Map Sybase types to Oracle types
type_mapping = {
'int': 'NUMBER',
'varchar': 'VARCHAR2',
'char': 'VARCHAR2',
'datetime': 'DATE',
'text': 'CLOB',
'image': 'BLOB',
'decimal': 'NUMBER',
'numeric': 'NUMBER',
'money': 'NUMBER',
'bit': 'NUMBER'
}
oracle_type = type_mapping.get(data_type, 'VARCHAR2')
column_info = {
"name": col_name,
"sybase_type": data_type,
"oracle_type": oracle_type,
"column_order": col_id
}
# Try to extract length from the messy data
for j in range(i, min(i+3, len(lines))):
if 'NULL' in lines[j]:
# Look for numbers near NULL markers
numbers = re.findall(r'\b(\d+)\b', lines[j])
if numbers:
column_info["length"] = int(numbers[0])
break
schema["columns"].append(column_info)
col_id += 1
i += 1
else:
i += 1
return schema
def save_schema_json(schema, output_file):
"""Save schema as JSON file"""
with open(output_file, 'w') as f:
json.dump(schema, f, indent=2, sort_keys=True)
print "Schema saved to: %s" % output_file
def main():
if len(sys.argv) != 4:
print "Usage: python schema_parser_batch.py "
print "Example: python schema_parser_batch.py isql_output.txt CacheSession CacheSession_schema.json"
print "Example: python schema_parser_batch.py users_output.txt Users Users_schema.json"
sys.exit(1)
input_file = sys.argv[1]
table_name = sys.argv[2]
output_file = sys.argv[3]
try:
schema = parse_isql_output(input_file, table_name)
save_schema_json(schema, output_file)
# Print summary
print "\nParsed %d columns for table '%s':" % (len(schema["columns"]), table_name)
for col in schema["columns"]:
print " %s: %s -> %s" % (col["name"], col["sybase_type"], col["oracle_type"])
except Exception as e:
print "Error processing table %s: %s" % (table_name, str(e))
sys.exit(1)
if name == "main":
main()
Top comments (0)