DEV Community

Sachin Varghese
Sachin Varghese

Posted on

Python compare script 7

import os
import difflib
import re

def extract_method_names(file_path):
    """Extract method names from a Java file."""
    method_names = []
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
        content = file.read()
        # Java method pattern: return_type methodName(parameters)
        # This regex looks for method declarations while avoiding comments and strings
        pattern = r'(?:public|protected|private|static|\s)+[\w\<\>\[\]]+\s+([\w]+)\s*\([\w\s,\[\]<>\.\]]*\)\s*(?:\{|[^;])'
        methods = re.findall(pattern, content)
        method_names.extend(methods)
    return method_names

def get_package_methods(repo_path):
    """Get all method names from Java files in a repository."""
    package_methods = {}
    for root, _, files in os.walk(repo_path):
        for file in files:
            if file.endswith('.java'):
                file_path = os.path.join(root, file)
                methods = extract_method_names(file_path)
                package_name = os.path.relpath(root, repo_path).replace(os.sep, '.')
                if package_name in package_methods:
                    package_methods[package_name].extend(methods)
                else:
                    package_methods[package_name] = methods
    return package_methods

def compare_methods(repo1_methods, repo2_methods):
    """Compare method names from two repositories and find methods unique to each."""
    comparison_results = {}
    all_packages = set(repo1_methods.keys()) | set(repo2_methods.keys())

    for package in all_packages:
        repo1_methods_set = set(repo1_methods.get(package, []))
        repo2_methods_set = set(repo2_methods.get(package, []))

        only_in_repo1 = repo1_methods_set - repo2_methods_set
        only_in_repo2 = repo2_methods_set - repo1_methods_set

        if only_in_repo1 or only_in_repo2:
            comparison_results[package] = {
                'only_in_repo1': sorted(list(only_in_repo1)),
                'only_in_repo2': sorted(list(only_in_repo2))
            }

    return comparison_results

def main(repo1_path, repo2_path, output_file='method_comparison_results.txt'):
    if not repo1_path or not repo2_path:
        print("Error: Repository paths must be specified")
        return

    repo1_methods = get_package_methods(repo1_path)
    repo2_methods = get_package_methods(repo2_path)
    comparison_results = compare_methods(repo1_methods, repo2_methods)

    # Write results to a text file
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("Method Comparison Results\n")
        f.write(f"Repo 1: {repo1_path}\n")
        f.write(f"Repo 2: {repo2_path}\n")
        f.write("=" * 80 + "\n\n")

        for package, result in comparison_results.items():
            f.write(f"InComparing methods in package: {package}\n")
            f.write("-" * 50 + "\n")
            if result['only_in_repo1']:
                f.write("Methods only in Repo1:\n")
                for method in result['only_in_repo1']:
                    f.write(f"  {method}\n")
                f.write("\n")
            if result['only_in_repo2']:
                f.write("Methods only in Repo2:\n")
                for method in result['only_in_repo2']:
                    f.write(f"  {method}\n")
                f.write("\n")

    print(f"Comparison results saved to {output_file}")

if __name__ == "__main__":
    # Replace these with the actual paths to your Java repositories
    repo1_path = '/path/to/your/first/java/repo'
    repo2_path = '/path/to/your/second/java/repo'

    # Optional: Specify the output file path
    output_file = 'java_method_comparison.txt'

    main(repo1_path, repo2_path, output_file)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)