<?php
if (!defined('ABSPATH')) {
    exit;
}


class MKLA_LinkChecker
{


    public static function normalize_url($url)
    {
        $url = trim((string) $url);
        if ($url === '')
            return '';
        $parsed = wp_parse_url($url);
        if (!$parsed)
            return trailingslashit($url);
        if (empty($parsed['host'])) {
            $url = home_url(ltrim($url, '/'));
            $parsed = wp_parse_url($url);
        }
        $scheme = (!empty($parsed['scheme'])) ? $parsed['scheme'] : (is_ssl() ? 'https' : 'http');
        $host = $parsed['host'];
        $path = isset($parsed['path']) ? $parsed['path'] : '/';
        $path = $path === '' ? '/' : $path;
        $norm = $scheme . '://' . $host . $path;
        if (!empty($parsed['query'])) {
            $norm .= '?' . $parsed['query'];
        }
        return trailingslashit($norm);
    }


    public static function absolutize_url($href)
    {
        if (!$href)
            return '';
        if (preg_match('#^https?://#i', $href))
            return $href;
        return home_url($href);
    }


    public static function build_target_variants($url)
    {
        $variants = [];
        $url_norm = self::normalize_url($url);
        $parsed = wp_parse_url($url_norm);
        if (!$parsed)
            return [$url_norm];
        $schemes = ['http', 'https'];
        $paths = [trailingslashit($parsed['path']), untrailingslashit($parsed['path'])];
        foreach ($schemes as $sch) {
            foreach ($paths as $p) {
                $base = $sch . '://' . $parsed['host'] . $p;
                if (!empty($parsed['query'])) {
                    $base .= '?' . $parsed['query'];
                }
                $variants[] = trailingslashit($base);
            }
        }
        return array_values(array_unique($variants));
    }


    // Get RAW post content (not filtered) for accurate DOM operations
    public static function get_post_and_content_by_url($url)
    {
        $post_id = url_to_postid($url);
        if (!$post_id)
            return [null, null];
        $post = get_post($post_id);
        if (!$post)
            return [null, null];
        $content = (string) $post->post_content;
        return [$post, $content];
    }


    /**
     * Find ALL <a> links targeting $target_url. Returns array of occurrences with type/text/images.
     * @return array<int, array{index:int, type:string, anchor_text:?string, image_srcs:array}>
     */
    public static function find_links($html, $target_url)
    {
        $occ = [];
        if (!$html)
            return $occ;


        $normalized_targets = self::build_target_variants($target_url);


        $doc = new DOMDocument();
        libxml_use_internal_errors(true);
        $doc->loadHTML('<?xml encoding="utf-8" ?>' . $html);
        libxml_clear_errors();


        $anchors = $doc->getElementsByTagName('a');
        $matchIdx = 0;
        foreach ($anchors as $a) {
            $href = $a->getAttribute('href');
            $href_norm = self::normalize_url(self::absolutize_url($href));
            if (in_array($href_norm, $normalized_targets, true)) {
                $img_srcs = [];
                $imgs = $a->getElementsByTagName('img');
                foreach ($imgs as $img) {
                    $src = $img->getAttribute('src');
                    if ($src) {
                        $img_srcs[] = self::absolutize_url($src);
                    }
                }
                $img_srcs = array_values(array_unique($img_srcs));
                $text = trim($a->textContent);
                $occ[] = [
                    'index' => $matchIdx,
                    'type' => count($img_srcs) ? 'image' : 'text',
                    'anchor_text' => ($text !== '' ? $text : null),
                    'image_srcs' => $img_srcs,
                ];
                $matchIdx++;
            }
        }
        return $occ;
    }
}