DEV Community

drake
drake

Posted on

通用时间解析

import time
import dateparser
from datetime import datetime
from dateparser.search import search_dates

"""通用时间解析算法"""

def parse_time(time_str):
    """
    通用时间解析函数,支持多种时间格式
    包括:ISO 8601、中文时间、相对时间等
    """

    def _parse_iso8601(time_str):
        """解析ISO 8601格式时间"""
        try:
            # 处理带毫秒和时区的ISO格式
            if 'T' in time_str and ('Z' in time_str or '+' in time_str or time_str.count('-') > 2):
                # 使用datetime.fromisoformat处理ISO格式(Python 3.7+)
                if time_str.endswith('Z'):
                    time_str = time_str[:-1] + '+00:00'
                dt = datetime.fromisoformat(time_str)
                return int(dt.timestamp())
        except:
            pass
        return None

    def _parse_chinese_time(time_str):
        """解析中文时间格式"""
        # 格式化中文时间
        if '' in time_str and '' in time_str and '' in time_str:
            time_str = time_str.replace('', '-').replace('', '-').replace('', ' ')

        if '.' in time_str and not ('T' in time_str):  # 避免影响ISO格式
            time_str = time_str.replace('.', '-')

        return time_str

    def _is_valid_hour_time(time_str):
        """判断是否是有效的时分格式"""
        try:
            time.strptime(time_str.strip(), '%H:%M')
            return True
        except:
            return False

    def _search_dates_wrapper(time_str):
        """search_dates的包装函数"""
        try:
            result = search_dates(time_str,
                                languages=['zh', 'en'],
                                settings={
                                    'DATE_ORDER': 'YMD',
                                    'STRICT_PARSING': True,
                                    'PREFER_DATES_FROM': 'past'
                                })
            if result:
                return int(time.mktime(result[0][1].timetuple()))
        except:
            pass
        return None

    # 输入验证
    if not time_str or not isinstance(time_str, str):
        return None

    time_str = time_str.strip()

    # 1. 优先尝试ISO 8601格式
    iso_result = _parse_iso8601(time_str)
    if iso_result is not None:
        return iso_result

    # 2. 处理特殊情况
    if '刚刚' in time_str or '刚才' in time_str:
        return int(time.time())

    # 3. 处理纯时分格式(如 "09:23")
    if _is_valid_hour_time(time_str):
        today = datetime.now().strftime('%Y-%m-%d')
        time_str = f"{today} {time_str}"

    # 4. 格式化中文时间
    formatted_time = _parse_chinese_time(time_str)

    # 5. 尝试使用search_dates解析
    if len(formatted_time.split('-')) == 2 or '' in formatted_time:
        result = _search_dates_wrapper(formatted_time)
        if result is not None:
            return result

        # 备用:使用dateparser
        try:
            parsed_time = dateparser.parse(formatted_time)
            if parsed_time:
                return int(time.mktime(parsed_time.timetuple()))
        except:
            pass

    # 6. 最后尝试通用解析
    try:
        result = _search_dates_wrapper(formatted_time)
        if result is not None:
            return result
    except:
        pass

    # 7. 使用dateparser作为最后的备选方案
    try:
        parsed_time = dateparser.parse(formatted_time)
        if parsed_time:
            return int(parsed_time.timestamp())
    except:
        pass

    return None



if __name__ == '__main__':
    # 测试原始示例
    t = '2025-09-03T09:23:53.000Z'
    res = parse_time(t)
    print(f"解析结果: {res}")
    if res:
        readable_time = datetime.fromtimestamp(res).strftime('%Y-%m-%d %H:%M:%S')
        print(f"可读时间: {readable_time}")
Enter fullscreen mode Exit fullscreen mode

Top comments (0)