/**
 * WebFetchTool - Fetch and extract information from web pages
 *
 * This tool is ideal for NON-INTERACTIVE data extraction.
 * When you just need to read information from a page without clicking, filling forms,
 * or interacting with elements, use this tool for fast HTTP-based extraction.
 *
 * Can be used by:
 * - Main agent directly
 * - Subagents (subagent can use web_fetch for quick data extraction)
 * - In parallel tool calls
 */

import { BrowserTool } from "../../../ai_tools_interface.js";
import { z } from "zod";

export class WebFetchTool extends BrowserTool {
  constructor() {
    super(
      "web_fetch",
      `**FAST** - Fetch a web page via HTTP and extract text content in ~1-2 seconds.

**PREFER THIS OVER navigate_to_url** when you just need to READ information from a page.
This is 10-20x faster than browser navigation because it skips rendering.

**IDEAL FOR:**
- Wikipedia pages (e.g., population, facts, dates)
- News articles
- Documentation pages
- Product pages with static pricing
- Any page where you just need to READ text

**EXAMPLE - Wikipedia lookup:**
{
  "url": "https://en.wikipedia.org/wiki/Iceland",
  "extract": "population"
}

**EXAMPLE - Price check:**
{
  "url": "https://store.example.com/product/123",
  "extract": "price and availability"
}

**DO NOT USE when:**
- Page requires login/authentication
- Content loads via JavaScript (SPAs, React apps)
- You need to click, fill forms, or interact
- Page has anti-bot protection (Cloudflare, CAPTCHA)

**RETURNS:** Page text content (up to 15000 chars, ~1-2 sec)`,
      z.object({
        url: z.string().describe("The URL to fetch"),
        extract: z.string().optional().nullable().describe("What to look for in the page (helps focus the extraction)")
      })
    );
  }

  async call(args) {
    const { url, extract } = args;
    const startTime = Date.now();

    console.log(`[WebFetch] Fetching: ${url}`);

    try {
      // Fetch the page
      const response = await fetch(url, {
        method: 'GET',
        headers: {
          'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
          'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
          'Accept-Language': 'en-US,en;q=0.5'
        }
      });

      if (!response.ok) {
        return {
          success: false,
          error: `HTTP ${response.status}: ${response.statusText}`,
          url,
          duration: Date.now() - startTime
        };
      }

      const html = await response.text();

      // Extract text content from HTML
      const textContent = this.extractTextFromHtml(html);

      // Truncate to reasonable size
      const truncated = textContent.substring(0, 15000);
      const wasTruncated = textContent.length > 15000;

      const duration = Date.now() - startTime;
      console.log(`[WebFetch] Completed in ${duration}ms, extracted ${truncated.length} chars`);

      return {
        success: true,
        url,
        content: truncated,
        truncated: wasTruncated,
        originalLength: textContent.length,
        duration,
        hint: extract ? `Look for: ${extract}` : undefined
      };

    } catch (error) {
      const errorMessage = error instanceof Error ? error.message : String(error);
      console.error('[WebFetch] Error:', errorMessage);
      return {
        success: false,
        error: errorMessage,
        url,
        duration: Date.now() - startTime
      };
    }
  }

  /**
   * Extract readable text from HTML, removing scripts, styles, and tags
   */
  extractTextFromHtml(html) {
    // Remove script and style elements
    let text = html
      .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
      .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
      .replace(/<noscript[^>]*>[\s\S]*?<\/noscript>/gi, '');

    // Remove HTML comments
    text = text.replace(/<!--[\s\S]*?-->/g, '');

    // Replace common block elements with newlines
    text = text.replace(/<(br|p|div|h[1-6]|li|tr)[^>]*>/gi, '\n');

    // Remove all remaining HTML tags
    text = text.replace(/<[^>]+>/g, ' ');

    // Decode HTML entities
    text = text
      .replace(/&nbsp;/g, ' ')
      .replace(/&amp;/g, '&')
      .replace(/&lt;/g, '<')
      .replace(/&gt;/g, '>')
      .replace(/&quot;/g, '"')
      .replace(/&#39;/g, "'")
      .replace(/&[a-z]+;/gi, ' ');

    // Normalize whitespace
    text = text
      .replace(/[ \t]+/g, ' ')
      .replace(/\n[ \t]+/g, '\n')
      .replace(/[ \t]+\n/g, '\n')
      .replace(/\n{3,}/g, '\n\n')
      .trim();

    return text;
  }
}

export default WebFetchTool;
