DEV Community

Sachin Varghese
Sachin Varghese

Posted on

Python compare script 6

import os
import difflib
import re

def extract_method_names(file_path):
    """Extract method names from a Java file."""
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
        content = file.read()
    # Java method pattern: return_type methodName(parameters)
    # This regex looks for method declarations while avoiding comments and strings
    pattern = r'(?:(?:public|protected|private|static|s)[\w\s]*\s+[\w<>\[\]]+\s+([\w]+)\s*\()'
    method_names = re.findall(pattern, content)
    return method_names

def get_package_methods(repo_path):
    """Get all method names from Java files in a repository."""
    package_methods = {}
    for root, _, files in os.walk(repo_path):
        for file in files:
            if file.endswith('.java'):
                file_path = os.path.join(root, file)
                methods = extract_method_names(file_path)
                package_name = os.path.relpath(root, repo_path).replace(os.sep, '.')
                if package_name in package_methods:
                    package_methods[package_name].extend(methods)
                else:
                    package_methods[package_name] = methods
    return package_methods

def compare_methods(repo1_methods, repo2_methods):
    """Compare method names from two repositories and find methods unique to each."""
    comparison_results = {}
    all_packages = set(repo1_methods.keys()) | set(repo2_methods.keys())
    for package in all_packages:
        repo1_methods_set = set(repo1_methods.get(package, []))
        repo2_methods_set = set(repo2_methods.get(package, []))
        only_in_repo1 = repo1_methods_set - repo2_methods_set
        only_in_repo2 = repo2_methods_set - repo1_methods_set
        if only_in_repo1 or only_in_repo2:
            comparison_results[package] = {
                'only_in_repo1': sorted(list(only_in_repo1)),
                'only_in_repo2': sorted(list(only_in_repo2))
            }
    return comparison_results

def main(repo1_path, repo2_path):
    if not repo1_path or not repo2_path:
        print("Error: Repository paths must be specified")
        return
    repo1_methods = get_package_methods(repo1_path)
    repo2_methods = get_package_methods(repo2_path)
    comparison_results = compare_methods(repo1_methods, repo2_methods)
    for package, result in comparison_results.items():
        print(f"\nComparing methods in package: {package}")
        if result['only_in_repo1']:
            print("Methods only in Repo1:")
            for method in result['only_in_repo1']:
                print(f"+ {method}")
        if result['only_in_repo2']:
            print("Methods only in Repo2:")
            for method in result['only_in_repo2']:
                print(f"- {method}")

if __name__ == "__main__":
    # Replace these paths with the actual paths to your repositories
    repo1_path = ''
    repo2_path = ''
    main(repo1_path, repo2_path)
Enter fullscreen mode Exit fullscreen mode

Top comments (0)