DEV Community

Query Filter
Query Filter

Posted on

py schema

!/usr/bin/env python

schema_parser_batch.py - Python 2.7.5 compatible

import json
import re
import sys
import os

def parse_isql_output(filename, table_name):
"""Parse the messy isql output and convert to JSON schema"""

schema = {
    "table_name": table_name,
    "columns": [],
    "export_info": {
        "delimiter": ",",
        "has_headers": False,
        "text_qualifier": "\"",
        "date_format": "Mon DD YYYY HH:MI:SS:mmmAM", 
        "null_string": "NULL"
    }
}

with open(filename, 'r') as f:
    content = f.read()

# Extract column information from the messy output
lines = content.split('\n')
i = 0
col_id = 1

while i < len(lines):
    line = lines[i].strip()

    # Skip command lines and empty lines
    if line and not line.startswith('+') and line != table_name:
        # This might be a column name
        col_name = line.strip()

        # Look for data type in next non-empty lines
        data_type = None
        for j in range(i+1, min(i+5, len(lines))):
            next_line = lines[j].strip()
            if next_line and next_line not in [table_name, 'int', 'varchar', 'datetime', 'char', 'text', 'image', 'decimal', 'numeric', 'money', 'bit']:
                # Common Sybase data types pattern matching
                type_patterns = {
                    'int': r'\bint\b',
                    'varchar': r'\bvarchar\b',
                    'char': r'\bchar\b',
                    'datetime': r'\bdatetime\b',
                    'text': r'\btext\b',
                    'image': r'\bimage\b',
                    'decimal': r'\bdecimal\b',
                    'numeric': r'\bnumeric\b',
                    'money': r'\bmoney\b',
                    'bit': r'\bbit\b'
                }

                for type_key, pattern in type_patterns.items():
                    if re.search(pattern, next_line.lower()):
                        data_type = type_key
                        break
                if data_type:
                    break

        if col_name and data_type:
            # Map Sybase types to Oracle types
            type_mapping = {
                'int': 'NUMBER',
                'varchar': 'VARCHAR2', 
                'char': 'VARCHAR2',
                'datetime': 'DATE',
                'text': 'CLOB',
                'image': 'BLOB',
                'decimal': 'NUMBER',
                'numeric': 'NUMBER', 
                'money': 'NUMBER',
                'bit': 'NUMBER'
            }

            oracle_type = type_mapping.get(data_type, 'VARCHAR2')

            column_info = {
                "name": col_name,
                "sybase_type": data_type,
                "oracle_type": oracle_type,
                "column_order": col_id
            }

            # Try to extract length from the messy data
            for j in range(i, min(i+3, len(lines))):
                if 'NULL' in lines[j]:
                    # Look for numbers near NULL markers
                    numbers = re.findall(r'\b(\d+)\b', lines[j])
                    if numbers:
                        column_info["length"] = int(numbers[0])
                        break

            schema["columns"].append(column_info)
            col_id += 1

        i += 1
    else:
        i += 1

return schema
Enter fullscreen mode Exit fullscreen mode

def save_schema_json(schema, output_file):
"""Save schema as JSON file"""
with open(output_file, 'w') as f:
json.dump(schema, f, indent=2, sort_keys=True)
print "Schema saved to: %s" % output_file

def main():
if len(sys.argv) != 4:
print "Usage: python schema_parser_batch.py "
print "Example: python schema_parser_batch.py isql_output.txt CacheSession CacheSession_schema.json"
print "Example: python schema_parser_batch.py users_output.txt Users Users_schema.json"
sys.exit(1)

input_file = sys.argv[1]
table_name = sys.argv[2]
output_file = sys.argv[3]

try:
    schema = parse_isql_output(input_file, table_name)
    save_schema_json(schema, output_file)

    # Print summary
    print "\nParsed %d columns for table '%s':" % (len(schema["columns"]), table_name)
    for col in schema["columns"]:
        print "  %s: %s -> %s" % (col["name"], col["sybase_type"], col["oracle_type"])

except Exception as e:
    print "Error processing table %s: %s" % (table_name, str(e))
    sys.exit(1)
Enter fullscreen mode Exit fullscreen mode

if name == "main":
main()

Top comments (0)