screenpipe icon indicating copy to clipboard operation
screenpipe copied to clipboard

[feature] collect website URLs

Open louis030195 opened this issue 1 year ago • 2 comments

it'd be useful to have website URL with app name and window name

louis030195 avatar Nov 06 '24 18:11 louis030195

use anyhow::Result;
use regex::Regex;

#[cfg(target_os = "macos")]
use core_foundation::{
    array::CFArrayRef,
    base::{CFRelease, CFTypeRef},
    string::CFStringRef,
};

#[cfg(target_os = "macos")]
use cocoa::{
    base::nil,
    foundation::NSString,
};

#[cfg(target_os = "windows")]
use windows::{
    Win32::UI::Accessibility::{IUIAutomation, UIA_ValuePatternId},
    Win32::UI::WindowsAndMessaging::{GetForegroundWindow, GetWindowTextW},
};

pub fn extract_urls_from_text(text: &str) -> Vec<String> {
    let url_regex = Regex::new(r#"https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&//=]*)"#).unwrap();
    url_regex
        .find_iter(text)
        .map(|m| m.as_str().to_string())
        .collect()
}

#[cfg(target_os = "macos")]
pub fn get_active_browser_url() -> Result<Option<String>> {
    use std::ptr;
    use core_foundation::base::TCFType;
    
    unsafe {
        let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace];
        let active_app: id = msg_send![workspace, frontmostApplication];
        if active_app == nil {
            return Ok(None);
        }

        let bundle_id: id = msg_send![active_app, bundleIdentifier];
        let bundle_str = NSString::retain(bundle_id);
        let bundle_rust = bundle_str.as_str();
        
        // Check if it's a browser
        if !["com.google.Chrome", "com.apple.Safari", "org.mozilla.firefox"]
            .contains(&bundle_rust)
        {
            return Ok(None);
        }

        // Use Apple Script to get URL (simplified example)
        let script = match bundle_rust {
            "com.google.Chrome" => "tell application \"Google Chrome\" to get URL of active tab of front window",
            "com.apple.Safari" => "tell application \"Safari\" to get URL of current tab of front window",
            "org.mozilla.firefox" => "tell application \"Firefox\" to get URL of active tab of front window",
            _ => return Ok(None),
        };

        let output = std::process::Command::new("osascript")
            .arg("-e")
            .arg(script)
            .output()?;

        if output.status.success() {
            let url = String::from_utf8(output.stdout)?;
            Ok(Some(url.trim().to_string()))
        } else {
            Ok(None)
        }
    }
}

#[cfg(target_os = "windows")]
pub fn get_active_browser_url() -> Result<Option<String>> {
    unsafe {
        let hwnd = GetForegroundWindow();
        let mut title = [0u16; 512];
        let len = GetWindowTextW(hwnd, &mut title);
        if len == 0 {
            return Ok(None);
        }

        let window_title = String::from_utf16_lossy(&title[..len as usize]);
        
        // Extract URL from common browser title patterns
        // Chrome: "Page Title - Google Chrome"
        // Firefox: "Page Title - Mozilla Firefox"
        // Edge: "Page Title - Microsoft Edge"
        
        let browsers = [
            "- Google Chrome",
            "- Mozilla Firefox",
            "- Microsoft Edge"
        ];

        if browsers.iter().any(|b| window_title.ends_with(b)) {
            // Use UI Automation to get more details
            let automation: IUIAutomation = /* initialize UI Automation */;
            let element = automation.ElementFromHandle(hwnd)?;
            
            // Get address bar using known patterns
            if let Some(address_bar) = find_address_bar(element) {
                let pattern = address_bar.GetCurrentPattern(UIA_ValuePatternId)?;
                let url = pattern.CurrentValue()?;
                return Ok(Some(url.to_string()));
            }
        }

        Ok(None)
    }
}

// Helper function to extract URLs from window titles
pub fn extract_url_from_title(title: &str) -> Option<String> {
    let url_regex = Regex::new(r#"(?:https?://)?(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&//=]*)"#).unwrap();
    url_regex.find(title).map(|m| m.as_str().to_string())
}

louis030195 avatar Nov 09 '24 21:11 louis030195

d

louis030195 avatar Feb 20 '25 01:02 louis030195