player/comprehensive_srt_check.py

import re
from datetime import datetime, timedelta

def comprehensive_srt_check(file_path):
    """Kapsamlı SRT format kontrolü - tüm olası hataları yakalar."""

    errors = []
    warnings = []

    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    i = 0
    subtitle_count = 0
    prev_end_time = None

    while i < len(lines):
        # Boş satırları atla
        while i < len(lines) and lines[i].strip() == '':
            i += 1

        if i >= len(lines):
            break

        subtitle_count += 1
        start_line = i + 1

        # ===== 1. ALTYAZI NUMARASI KONTROLÜ =====
        line_num = i + 1
        line_content = lines[i].strip()

        if not line_content:
            errors.append(f"Satir {line_num}: Bos satir, altyazi numarasi bekleniyor")
            i += 1
            continue

        if not line_content.isdigit():
            errors.append(f"Satir {line_num}: Altyazi numarasi olmali, bulunan: '{line_content[:50]}'")
            # Sonraki geçerli numarayı bulmaya çalış
            while i < len(lines) and not lines[i].strip().isdigit():
                i += 1
            if i >= len(lines):
                break
        else:
            actual_number = int(line_content)
            if actual_number != subtitle_count:
                warnings.append(f"Satir {line_num}: Numara sırası bozuk - beklenen: {subtitle_count}, bulunan: {actual_number}")

        i += 1

        # ===== 2. ZAMAN DAMGASI KONTROLÜ =====
        if i >= len(lines):
            errors.append(f"Altyazi {subtitle_count}: Zaman damgasi eksik (dosya sonu)")
            break

        line_num = i + 1
        timestamp_line = lines[i].strip()

        if not timestamp_line:
            errors.append(f"Satir {line_num}: Bos satir, zaman damgasi bekleniyor")
            i += 1
            continue

        # Zaman damgası formatını detaylı kontrol et
        if '-->' not in timestamp_line:
            errors.append(f"Satir {line_num}: '-->' ayirici bulunamadi: '{timestamp_line[:50]}'")
            i += 1
            continue

        parts = timestamp_line.split('-->')
        if len(parts) != 2:
            errors.append(f"Satir {line_num}: Gecersiz zaman damgasi formati: '{timestamp_line[:50]}'")
            i += 1
            continue

        start_time_str = parts[0].strip()
        end_time_str = parts[1].strip()

        # Başlangıç zamanı kontrolü
        start_errors = validate_timestamp(start_time_str, "baslangic")
        if start_errors:
            for err in start_errors:
                errors.append(f"Satir {line_num} {err}: '{start_time_str}'")

        # Bitiş zamanı kontrolü
        end_errors = validate_timestamp(end_time_str, "bitis")
        if end_errors:
            for err in end_errors:
                errors.append(f"Satir {line_num} {err}: '{end_time_str}'")

        # Zamanları parse et ve mantıksal kontroller yap
        if not start_errors and not end_errors:
            start_ms = parse_timestamp_to_ms(start_time_str)
            end_ms = parse_timestamp_to_ms(end_time_str)

            if start_ms is None or end_ms is None:
                errors.append(f"Satir {line_num}: Zaman parse edilemedi: '{timestamp_line}'")
            else:
                # Başlangıç >= Bitiş kontrolü
                if start_ms >= end_ms:
                    errors.append(f"Satir {line_num}: Baslangic zamani bitis zamanindan buyuk/esit: {start_time_str} >= {end_time_str}")

                # Negatif zaman kontrolü
                if start_ms < 0 or end_ms < 0:
                    errors.append(f"Satir {line_num}: Negatif zaman degeri: '{timestamp_line}'")

                # Çok uzun altyazı kontrolü (>10 saniye)
                duration_ms = end_ms - start_ms
                if duration_ms > 10000:
                    warnings.append(f"Satir {line_num}: Cok uzun altyazi suresi ({duration_ms/1000:.1f} saniye): {start_time_str} --> {end_time_str}")

                # Çok kısa altyazı kontrolü (<0.1 saniye)
                if duration_ms < 100:
                    warnings.append(f"Satir {line_num}: Cok kisa altyazi suresi ({duration_ms}ms): {start_time_str} --> {end_time_str}")

                # Önceki altyazı ile çakışma kontrolü
                if prev_end_time is not None and start_ms < prev_end_time:
                    time_overlap = prev_end_time - start_ms
                    warnings.append(f"Satir {line_num}: Onceki altyazi ile cakisma ({time_overlap}ms): {start_time_str}")

                prev_end_time = end_ms

        i += 1

        # ===== 3. METİN İÇERİĞİ KONTROLÜ =====
        text_lines = []
        text_start_line = i + 1

        while i < len(lines) and lines[i].strip() != '':
            # Bir sonraki satır numara mı kontrol et (yeni altyazı başlangıcı)
            if (i + 1 < len(lines) and
                lines[i].strip().isdigit() and
                '-->' in lines[i + 1]):
                break

            text_lines.append(lines[i].rstrip())
            i += 1

        if not text_lines:
            warnings.append(f"Altyazi {subtitle_count} (satir {start_line}): Metin icerigi bos")
        else:
            # Metin kontrolü
            full_text = ' '.join([t.strip() for t in text_lines])

            # Çok uzun metin kontrolü
            if len(full_text) > 200:
                warnings.append(f"Altyazi {subtitle_count}: Cok uzun metin ({len(full_text)} karakter)")

            # HTML tag kontrolü
            if re.search(r'<[^>]+>', full_text):
                warnings.append(f"Altyazi {subtitle_count}: HTML/XML tag iceriyor (bazi oynaticilar desteklemeyebilir)")

            # Garip karakterler
            if re.search(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', full_text):
                warnings.append(f"Altyazi {subtitle_count}: Kontrol karakterleri iceriyor")

    return errors, warnings, subtitle_count


def validate_timestamp(time_str, time_type):
    """Tek bir zaman damgasını (HH:MM:SS,mmm) doğrular."""
    errors = []

    # Format: HH:MM:SS,mmm
    pattern = r'^(\d{2}):(\d{2}):(\d{2}),(\d{3})$'
    match = re.match(pattern, time_str)

    if not match:
        # Hangi kısım hatalı bul
        if ',' not in time_str and '.' not in time_str:
            errors.append(f"({time_type}): Milisaniye ayirici eksik (virgul)")
        elif '.' in time_str:
            errors.append(f"({time_type}): Milisaniye ayirici nokta olmamali, virgul olmali")
        elif time_str.count(':') != 2:
            errors.append(f"({time_type}): ':' ayirici sayisi yanlis (2 olmali)")
        else:
            # Rakam sayısı kontrolü
            parts = time_str.replace(',', ':').replace('.', ':').split(':')
            if len(parts) == 4:
                hours, mins, secs, ms = parts
                if len(hours) != 2:
                    errors.append(f"({time_type}): Saat 2 haneli olmali")
                if len(mins) != 2:
                    errors.append(f"({time_type}): Dakika 2 haneli olmali")
                if len(secs) != 2:
                    errors.append(f"({time_type}): Saniye 2 haneli olmali")
                if len(ms) != 3:
                    errors.append(f"({time_type}): Milisaniye 3 haneli olmali")
            else:
                errors.append(f"({time_type}): Format hatasi (HH:MM:SS,mmm olmali)")
        return errors

    # Değer aralığı kontrolü
    hours, mins, secs, ms = match.groups()
    hours, mins, secs, ms = int(hours), int(mins), int(secs), int(ms)

    if hours > 23:
        warnings = []  # Videolar 24 saatten uzun olabilir, warning olarak işaretle
    if mins > 59:
        errors.append(f"({time_type}): Dakika 59'dan buyuk olamaz ({mins})")
    if secs > 59:
        errors.append(f"({time_type}): Saniye 59'dan buyuk olamaz ({secs})")
    if ms > 999:
        errors.append(f"({time_type}): Milisaniye 999'dan buyuk olamaz ({ms})")

    return errors


def parse_timestamp_to_ms(time_str):
    """Zaman damgasını milisaniyeye çevirir."""
    try:
        # Format: HH:MM:SS,mmm
        pattern = r'^(\d{2}):(\d{2}):(\d{2}),(\d{3})$'
        match = re.match(pattern, time_str)

        if not match:
            return None

        hours, mins, secs, ms = match.groups()
        total_ms = (int(hours) * 3600 + int(mins) * 60 + int(secs)) * 1000 + int(ms)
        return total_ms
    except:
        return None


def print_results(errors, warnings, subtitle_count, file_path):
    """Sonuçları yazdır."""
    print(f"Dosya: {file_path}")
    print(f"Toplam altyazi sayisi: {subtitle_count}")
    print(f"\n{'='*70}")

    if errors:
        print(f"\nHATALAR ({len(errors)} adet):")
        print("="*70)
        for i, error in enumerate(errors[:100], 1):
            print(f"  {i}. {error}")
        if len(errors) > 100:
            print(f"\n  ... ve {len(errors) - 100} hata daha")
    else:
        print("\nKritik hata bulunamadi!")

    if warnings:
        print(f"\nUYARILAR ({len(warnings)} adet):")
        print("="*70)
        for i, warning in enumerate(warnings[:50], 1):
            print(f"  {i}. {warning}")
        if len(warnings) > 50:
            print(f"\n  ... ve {len(warnings) - 50} uyari daha")
    else:
        print("\nUyari bulunamadi!")

    print(f"\n{'='*70}")

    if not errors and not warnings:
        print("\nSonuc: SRT dosyasi MUKEMMEL durumda!")
    elif not errors:
        print(f"\nSonuc: Format dogru ama {len(warnings)} uyari var")
    else:
        print(f"\nSonuc: {len(errors)} HATA, {len(warnings)} uyari")

    return errors, warnings


if __name__ == "__main__":
    import sys

    file_path = sys.argv[1] if len(sys.argv) > 1 else "public/ses.srt"

    print("\nKAPSAMLI SRT FORMAT KONTROLU")
    print("="*70)
    print("\nKontrol edilen hususlar:")
    print("  - Altyazi numara sirasi")
    print("  - Zaman damgasi formati (HH:MM:SS,mmm)")
    print("  - Baslangic/bitis zaman mantigi")
    print("  - Altyazi cakismalari")
    print("  - Altyazi sureleri")
    print("  - Metin icerigi")
    print("="*70)

    errors, warnings, subtitle_count = comprehensive_srt_check(file_path)
    print_results(errors, warnings, subtitle_count, file_path)