﻿import os
import time
import  collections
import traceback
import zipfile
import logging
import platform

from distutils.version import LooseVersion
from typing import Tuple

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, UnexpectedAlertPresentException
from selenium.webdriver.common.alert import Alert 

from core.models import Proxy
from core.utils import ResourceController, timeit

FIREFOX_BINARY = {
    'Windows': "C:/Program Files (x86)/Mozilla Firefox/firefox.exe",
    'Linux': '/usr/bin/firefox'
}
FIREFOX_DRIVER = {
    'Windows': "D:/www/google-map/drivers/geckodriver.exe",
    'Linux': '~/.wdm/drivers/geckodriver/linux64/v0.33.0/geckodriver'
}
FIREFOX_PLUGIN_FILE = f'tmp/proxy-firefox.xpi'
RESPONSE_TIMEOUT = 10
PROXY_REGEXP = '^(?P<proto>https?://)?(?P<usr>[^:]+):(?P<pwd>[^@]+)@(?P<host>[^:]+):(?P<port>\d+)$'

# normal	complete	Used by default, waits for all resources to download
# eager	    	interactive	DOM access is ready, but other resources like images may still be loading
# none	    	Any	        Does not block WebDriver at all
STRATEGY = "normal"  # "normal", "eager", "none"


def firefox_manifest():
    return """
    {
      "name": "My Firefox Proxy",
      "version": "1.0.0",
      "manifest_version": 2,
      "permissions": [
        "browsingData",
        "proxy",
        "storage",
        "tabs",
        "webRequest",
        "webRequestBlocking",
        "downloads",
        "notifications",
        "privacy",
        "<all_urls>"
      ],
      "background": {
        "scripts": ["background.js"]
      },
      "browser_specific_settings": {
        "gecko": {
          "id": "myproxy@example.org"
        }
      }
    }
    """


def firefox_background_js(**kwargs):
    return """
    const host = "%(host)s";
    const port = "%(port)s";
    let config = {
        mode: "fixed_servers",
        rules: {
          singleProxy: {
            scheme: "http",
            host: host,
            port: port
          },
          bypassList: []
        }
     };


    function proxyRequest(request_data) {
        return {
            type: "http",
            host: host, 
            port: port
        };
    }

    browser.proxy.settings.set({value: config, scope: "regular"}, function() {;});

    function callbackFn(details) {
        const username = '%(user)s';
        const password = '%(password)s';
        return {
            authCredentials: {username: username, password: password},
        };
    }

    browser.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
    );
    browser.proxy.onRequest.addListener(proxyRequest, {urls: ["<all_urls>"]});
    """ % kwargs


def get_latest_geckodriver_path():
    try:
        home_dir = os.path.expanduser("~")
        if platform.system() == 'Windows':
            sub_path = os.path.join('drivers', 'geckodriver', 'win64')
            driver_name = 'geckodriver.exe'
        else:  # Assuming Unix-based system
            sub_path = os.path.join('drivers', 'geckodriver', 'linux64')
            driver_name = 'geckodriver'
        path = os.path.join(home_dir, '.wdm', sub_path)
        subfolders = [f.path for f in os.scandir(path) if f.is_dir()]
        # Extract versions from the folder names
        versions = [folder.split(os.sep)[-1] for folder in subfolders]
        # Compare versions and find the latest
        latest_version = str(max(LooseVersion(ver) for ver in versions))
        # Full path to the geckodriver with the latest version
        latest_geckodriver_path = os.path.join(path, latest_version, driver_name)
    except FileNotFoundError:
        latest_geckodriver_path = None
    return latest_geckodriver_path or GeckoDriverManager().install()


IMPLICITLY_WAIT = 5
WINDOW_SIZE = (1920, 1080)


class FirefoxClient(webdriver.Firefox):

    SCROLL = """
      for (i=0; i<elements.length; i++){
        elements_html.push(elements[i].innerHTML);
        elements[i].remove();
    }
    
    """

    def __init__(self, proxy: Proxy = None, user_agent: str = None, strategy: str = STRATEGY, debug: bool = False,
                 languages: str = "en-US, en", window_size: Tuple[int] = WINDOW_SIZE):
        self.proxy = proxy
        self.user_agent = user_agent
        self.strategy = strategy
        self.debug = debug
        self.languages = languages
        self.window_size = window_size

        service = Service(executable_path=get_latest_geckodriver_path(), log_path="tmp/geckodriver.log")
        options = self.__create_options()

        super().__init__(service=service, options=options)

        self.__install_proxy_addon()

        if self.window_size:
            self.set_window_size(*self.window_size)
        else:
            self.maximize_window()
        self.implicitly_wait(IMPLICITLY_WAIT)
        self.actions = ActionChains(self)

        self.logger = self.__get_logger()

        self.resource_controller = ResourceController()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def get(self, url, timeout=RESPONSE_TIMEOUT):
        super().get(url)
        try:
            WebDriverWait(self, timeout=timeout, poll_frequency=1.5).until(EC.presence_of_element_located((By.XPATH, "/html/body")))
        except UnexpectedAlertPresentException:
            pass

    def close(self):

        self.uninstall_addon(self.proxy_addon_id)

        super().close()
        super().quit()

        for file in os.listdir("/tmp/"):
            if "addon" in file:
                os.remove(os.path.join("/tmp/", file))

    @property
    def tabs(self):
        return self.window_handles

    def switch_to_tab(self, tab):
        self.switch_to.window(tab)

    def open_tab(self, url):
        self.execute_script("window.open('%s', '_blank');" % url)
        return self.tabs[-1]

    def close_tab(self, tab):
        self.switch_to_tab(tab)
        self.close()

    def page_source(self, tab=None):
        if tab:
            self.switch_to_tab(tab)
        return super().page_source

    def bs4(self, source=None, tab=None, features='html.parser'):
        return BeautifulSoup(source or self.page_source(tab), features)

    def screenshot(self, image_path):
        return self.get_screenshot_as_file(image_path)

    def focus(self, element):
        self.execute_script("arguments[0].focus();", element)

    def scroll(self, break_function, element_selector, height_selector, remove_script="", remove=True, timeout=60):
        scroll_height = 0
        start_time = time.time()
        element = self.find_element(By.CSS_SELECTOR, height_selector)

        elements = []
        waits = collections.deque(maxlen=10)
        while time.time() - start_time < timeout:

            if break_function(elements):
                break

            if waits:
                wait_time = sum(waits) / len(waits)
                time.sleep(wait_time)

                if wait_time > 10:
                    waits.clear()

            self.resource_controller.control()

            new_scroll_height = self.execute_script(f"return arguments[0].scrollHeight", element)

            self.actions.send_keys(Keys.PAGE_DOWN).perform()

            if remove:
                script = """
                    let elements_html = [];
                    let elements = document.querySelectorAll('%s');
                     
                    %s    
                    
                    return elements_html;
                """ % (element_selector, remove_script or self.SCROLL)
                els = self.execute_script(script)
                elements += els

            if scroll_height != new_scroll_height:
                waits.append(time.time() - start_time)
                start_time = time.time()
                scroll_height = new_scroll_height

        if not remove:
            elements = self.execute_script("""
                    elements_html = [];
                    elements = document.querySelectorAll('%s');
                    
                    for (i=0; i<elements.length; i++){
                        %s
                        elements_html.push(elements[i].innerHTML);
                    }
                    return elements_html;
                """ % (element_selector, remove_script))
        return elements

    def element_exists(self, element_selector):
        try:
            self.find_element(By.CSS_SELECTOR, element_selector)
            return True
        except NoSuchElementException:
            return False

    def find_element_with_function(self, find_function, caption=''):
        try:
            return find_function()
        except Exception as e:
            trb = traceback.format_exc()
            self.logger.error(f'Error {caption}: {e}\n\n{trb}')

    def find_element_by_script(self, script, caption=''):
        try:
            return self.execute_script(script)
        except Exception as e:
            trb = traceback.format_exc()
            self.logger.error(f'Error {caption}: {e}\n\n{trb}')

    @staticmethod
    def __get_logger():
        logger = logging.getLogger('scraper')                       # create logger
        logger.setLevel(logging.DEBUG)
        fh = logging.FileHandler('tmp/scraper.log')                # create console handler and set level to debug
        fh.setLevel(logging.DEBUG)
        formatter = '%(asctime)s - %(levelname)s - %(message)s'
        formatter = logging.Formatter(formatter)                    # create formatter
        fh.setFormatter(formatter)                                  # add formatter to ch
        logger.addHandler(fh)                                       # add ch to logger
        return logger

    def __install_proxy_addon(self):
        if self.proxy:
            with zipfile.ZipFile(FIREFOX_PLUGIN_FILE, 'w') as zp:
                zp.writestr("manifest.json", firefox_manifest())
                zp.writestr("background.js", firefox_background_js(
                    host=self.proxy.ip, port=int(self.proxy.port),
                    user=self.proxy.user, password=self.proxy.password))

            self.proxy_addon_id = self.install_addon(FIREFOX_PLUGIN_FILE, temporary=True)

    def __create_options(self) -> webdriver.FirefoxOptions:
        firefox_options = webdriver.FirefoxOptions()
        firefox_options.binary_location = FIREFOX_BINARY[platform.system()]

        firefox_options.add_argument("-private")
        firefox_options.set_preference("browser.privatebrowsing.autostart", True)
        firefox_options.set_preference("browser.sessionstore.interval", 120000)
        firefox_options.set_preference('dom.max_script_run_time', 60)
        firefox_options.set_preference("browser.sessionhistory.max_entries", 5)
        firefox_options.set_preference("browser.sessionhistory.max_total_viewers", 1)

        firefox_options.set_preference("signon.autologin.proxy", False)
        firefox_options.set_preference("xpinstall.signatures.required", False)
        firefox_options.set_preference("network.negotiate-auth.allow-proxies", False)
        firefox_options.set_preference("network.http.connection-timeout", RESPONSE_TIMEOUT)
        firefox_options.set_preference("network.http.response.timeout", RESPONSE_TIMEOUT)
        firefox_options.set_preference("dom.max_script_run_time", RESPONSE_TIMEOUT)
        firefox_options.set_preference("intl.accept_languages", self.languages)
        firefox_options.set_preference('permissions.default.image', 2)
        firefox_options.set_preference('permissions.default.stylesheet', 2)
        firefox_options.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
        
        firefox_options.set_preference("privacy.resistFingerprinting", True)
        
        firefox_options.set_preference("browser.cache.memory.enable", False)
        firefox_options.set_preference("browser.cache.offline.enable", False)
        firefox_options.set_preference("network.http.use-cache", False)
        
        firefox_options.set_preference("browser.preferences.defaultPerformanceSettings.enabled", False)
        firefox_options.set_preference("dom.ipc.processCount", 1)

        firefox_options.set_preference("dom.ipc.processCount.webIsolated", 1)
        firefox_options.set_preference("gfx.webrender.software", True)

        firefox_options.set_preference("network.prefetch-next", False)
        firefox_options.set_preference("network.http.pipelining", True)
        firefox_options.set_preference("network.http.proxy.pipelining", True)
        firefox_options.set_preference("network.http.max-connections", 300)
        firefox_options.set_preference("network.http.pipelining.maxrequests", 8)
        firefox_options.set_preference("content.notify.interval", 500000)
        firefox_options.set_preference("content.notify.ontimer", True)
        firefox_options.set_preference("content.switch.threshold", 250000)
        firefox_options.set_preference("browser.cache.memory.capacity", 256000)  # Increase the cache capacity.
        firefox_options.set_preference("browser.startup.homepage", "about:blank")
        firefox_options.set_preference("reader.parse-on-load.enabled", False)  # Disable reader, we won't need that.
        firefox_options.set_preference("browser.pocket.enabled", False)  # Duck pocket too!
        firefox_options.set_preference("loop.enabled", False)
        firefox_options.set_preference("browser.chrome.toolbar_style", 1)  # Text on Toolbar instead of icons
        firefox_options.set_preference("browser.display.show_image_placeholders",
                                       False)  # Don't show thumbnails on not loaded images.
        firefox_options.set_preference("browser.display.use_document_colors", False)  # Don't show document colors.
        firefox_options.set_preference("browser.display.use_document_fonts", 0)  # Don't load document fonts.
        firefox_options.set_preference("browser.display.use_system_colors", True)  # Use system colors.
        firefox_options.set_preference("browser.formfill.enable", False)  # Autofill on forms disabled.
        firefox_options.set_preference("browser.helperApps.deleteTempFileOnExit", True)  # Delete temprorary files.
        firefox_options.set_preference("browser.shell.checkDefaultBrowser", False)
        firefox_options.set_preference("browser.startup.homepage", "about:blank")
        firefox_options.set_preference("browser.startup.page", 0)  # blank
        firefox_options.set_preference("browser.tabs.forceHide", True)  # Disable tabs, We won't need that.
        firefox_options.set_preference("browser.urlbar.autoFill", False)  # Disable autofill on URL bar.
        firefox_options.set_preference("browser.urlbar.autocomplete.enabled", False)  # Disable autocomplete on URL bar.
        firefox_options.set_preference("browser.urlbar.showPopup",
                                       False)  # Disable list of URLs when typing on URL bar.
        firefox_options.set_preference("browser.urlbar.showSearch", False)  # Disable search bar.
        firefox_options.set_preference("extensions.checkCompatibility", False)  # Addon update disabled
        firefox_options.set_preference("extensions.checkUpdateSecurity", False)
        firefox_options.set_preference("extensions.update.autoUpdateEnabled", False)
        firefox_options.set_preference("extensions.update.enabled", False)
        firefox_options.set_preference("general.startup.browser", False)
        firefox_options.set_preference("plugin.default_plugin_disabled", False)
        firefox_options.set_preference("permissions.default.image", 2)  # Image load disabled again

        firefox_options.set_preference("dom.webnotifications.enabled", False)
        firefox_options.set_preference("dom.push.enabled", False)

        if self.debug:
            firefox_options.set_preference("devtools.chrome.enabled", True)
            firefox_options.set_preference("devtools.debugger.prompt-connection", False)
            firefox_options.set_preference("devtools.debugger.remote-enabled", True)
            firefox_options.set_preference("devtools.debugger.force-local", False)
            firefox_options.set_preference("browser.tabs.remote.autostart.2", False)
        else:
            firefox_options.add_argument('-headless')
        firefox_options.page_load_strategy = self.strategy
        if self.user_agent:
            firefox_options.set_preference("general.useragent.override", self.user_agent)
        return firefox_options

