#!/usr/bin/env python3
# Timestamp: "2026-01-05 14:30:00 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex-code/src/scitex/datetime/_normalize_timestamp.py
"""
Timestamp Standardization Utilities
Functionality:
- Standardizes timestamps to consistent format defined in CONFIG.FORMATS.TIMESTAMP
- Handles various input formats (datetime objects, strings, timestamps)
- Provides UTC normalization
- Ensures consistent timestamp formatting across the codebase
Input formats supported:
- datetime objects (with or without timezone)
- Unix timestamps (int/float)
- Various string formats
Output:
- Standardized timestamp strings in format: "%Y-%m-%d %H:%M:%S.%f"
- UTC normalized timestamps
- Validation utilities
Prerequisites:
- CONFIG.FORMATS.TIMESTAMP for standard format
"""
from __future__ import annotations
import re
from datetime import datetime, timezone
from typing import Union
# Default standard format
DEFAULT_FORMAT = "%Y-%m-%d %H:%M:%S"
# Try to get standard format from config, fallback to default.
# Use the standalone scitex_io peer (PA304 §3 — no umbrella imports here).
# `scitex_io` is not declared in pyproject extras; the helper still gives us
# the canonical None-on-ImportError shape so the import audit (PA-302) is happy.
from scitex_dev import try_import_optional
_load_configs = try_import_optional("scitex_io", "load_configs")
STANDARD_FORMAT = DEFAULT_FORMAT
if _load_configs is not None:
try:
CONFIG = _load_configs()
STANDARD_FORMAT = (
getattr(getattr(CONFIG, "FORMATS", None), "TIMESTAMP", None)
or DEFAULT_FORMAT
)
except Exception:
# load_configs may exist but raise (missing config files, etc.).
STANDARD_FORMAT = DEFAULT_FORMAT
# Common alternative formats to try when parsing
ALTERNATIVE_FORMATS = [
"%Y-%m-%dT%H:%M:%S.%f",
"%Y-%m-%dT%H:%M:%S", # ISO 8601 with T (no microseconds)
"%Y-%m-%d %H:%M:%S.%f",
"%Y-%m-%d %H:%M:%S",
"%Y/%m/%d %H:%M:%S.%f",
"%Y/%m/%d %H:%M:%S",
"%d-%m-%Y %H:%M:%S.%f",
"%d-%m-%Y %H:%M:%S",
"%d/%m/%Y %H:%M:%S.%f",
"%d/%m/%Y %H:%M:%S",
"%d/%m/%Y, %H:%M:%S", # Format used in REC_START
"%Y%m%d %H:%M:%S.%f",
"%Y%m%d %H:%M:%S",
"%Y-%m-%d_%H:%M:%S.%f",
"%Y-%m-%d_%H:%M:%S",
]
[docs]
def normalize_timestamp(
timestamp: Union[datetime, str, int, float],
return_as: str = "str",
normalize_utc: bool = True,
) -> Union[str, datetime, float]:
"""
Standardize any timestamp format to requested output type.
Parameters
----------
timestamp : datetime, str, int, or float
Timestamp in any supported format
return_as : str
Output format: "str" (default), "datetime", or "timestamp"
normalize_utc : bool
If True, normalize to UTC timezone
Returns
-------
str, datetime, or float
Standardized timestamp in requested format:
- "str": String in CONFIG.FORMATS.TIMESTAMP format
- "datetime": datetime object
- "timestamp": Unix timestamp (float)
Examples
--------
>>> from datetime import datetime
>>> dt = datetime(2010, 6, 18, 10, 15, 0)
>>> normalize_timestamp(dt, return_as="str", normalize_utc=False)
'2010-06-18 10:15:00'
"""
# Convert to datetime object
dt = to_datetime(timestamp)
# Normalize to UTC if requested
if normalize_utc:
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
else:
dt = dt.astimezone(timezone.utc)
# Return in requested format
if return_as == "str":
return dt.strftime(STANDARD_FORMAT)
elif return_as == "datetime":
return dt
elif return_as == "timestamp":
return dt.timestamp()
else:
raise ValueError(
f"return_as must be 'str', 'datetime', or 'timestamp', got: {return_as}"
)
[docs]
def to_datetime(timestamp: Union[datetime, str, int, float]) -> datetime:
"""
Convert various timestamp formats to datetime object.
Parameters
----------
timestamp : datetime, str, int, or float
Timestamp in any supported format
Returns
-------
datetime
Datetime object
Raises
------
ValueError
If string format cannot be parsed
TypeError
If timestamp type is not supported
"""
# Already datetime
if isinstance(timestamp, datetime):
return timestamp
# Unix timestamp (int/float)
elif isinstance(timestamp, (int, float)):
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
# String format
elif isinstance(timestamp, str):
# Handle nanosecond precision by truncating to microseconds
if "." in timestamp and len(timestamp.split(".")[-1]) > 6:
parts = timestamp.split(".")
# Keep only first 6 digits of fractional seconds
truncated_microseconds = parts[-1][:6]
# Handle cases where there might be additional text after microseconds
if not truncated_microseconds.isdigit():
# Extract just the digit portion
digits = re.match(r"(\d+)", parts[-1])
if digits:
truncated_microseconds = digits.group(1)[:6]
timestamp = ".".join(parts[:-1] + [truncated_microseconds])
# Try parsing with various formats
for fmt in ALTERNATIVE_FORMATS:
try:
return datetime.strptime(timestamp, fmt)
except ValueError:
continue
# If no format matched, raise error
raise ValueError(
f"Could not parse timestamp string: {timestamp}. "
f"Tried formats: {ALTERNATIVE_FORMATS}"
)
else:
raise TypeError(
f"timestamp must be datetime, str, int, or float, got: {type(timestamp)}"
)
[docs]
def get_time_delta_seconds(
start: Union[datetime, str], end: Union[datetime, str]
) -> float:
"""
Calculate time difference in seconds between two timestamps.
Parameters
----------
start : datetime or str
Start timestamp
end : datetime or str
End timestamp
Returns
-------
float
Time difference in seconds
"""
start_dt = to_datetime(start)
end_dt = to_datetime(end)
delta = end_dt - start_dt
return delta.total_seconds()
# EOF