JustToThePoint English Website Version
JustToThePoint en español

AI-Powered DuckDuckGo SearXNG Search & Web Scraping

Behind this mask there is more than just flesh. Beneath this mask there is an idea… and ideas are bulletproof, Alan Moore

Complex limits

In modern development workflows, augmenting traditional web scraping with generative AI can dramatically improve query relevance and content understanding. In this guide, we will:

  1. Generate better DuckDuckGo queries from user prompts using an LLM (via Ollama).
  2. Route our searches through Tor for privacy.
  3. Fetch results with the duckduckgo_search library.
  4. Scrape each result’s HTML using robust extractors (Trafilatura, Readability, Newspaper3k, Playwright).
  5. Summarize the extracted content back through our AI pipeline.

Imports and Dependencies

# vim queryweb.com
import socket # For checking if Tor is running
import subprocess # For executing shell commands
import requests # For making HTTP requests
from bs4 import BeautifulSoup # Import BeautifulSoup for parsing HTML and XML documents
from trafilatura.settings import use_config # Importing trafilatura settings for better HTML parsing
import trafilatura # Import trafilatura for web crawling and content extraction
import asyncio # Import asyncio for asynchronous programming
import mymessages # Import custom messages module for predefined message templates
from duckduckgo_search import DDGS # Import DDGS for performing DuckDuckGo searches
import re  # Importing regex for pattern matching
import os  # For environment variable management
import time  # For sleep functionality
from colorama import Fore, Style # Import Fore and Style for colored terminal text output
from util import display_text_color, call_ollama # Importing utility function for colored text display
from myscrape import scrape_web_content
from dotenv import load_dotenv # For loading environment variables from .env files
from utilollama import summarize
import time
from stem import Signal
from stem.control import Controller
import stem
import random
from dotenv import load_dotenv  # For loading environment variables from .env files
import os

script_dir = os.path.dirname(os.path.abspath(__file__))
dotenv_path = os.path.join(script_dir, '.env')
# Load environment variables from .env file
load_dotenv(dotenv_path, override=True)
# Get the model name for summarization from environment variables
model_query = os.environ.get("MYMODEL_QUERY", "")  # Ollama model to make the query
max_results = int(os.environ.get("MAX_RESULTS", "2")) # Maximum results from environment
max_retries = int(os.environ.get("MAX_RETRIES", "4")) # Maximum retries from environment
SEARX_URL = os.environ.get("SEARX_URL", "http://127.0.0.1:8080/search") # SearX URL from environment
# TOR_CONTROL_PASS: This variable holds the password required to authenticate connections to the Tor control port.
TOR_CONTROL_PASS = os.environ.get("TOR_CONTROL_PASS", "YOUR_PASSWORD") # Tor control password

query_generator

def query_generator(model_name: str, messages: list[dict]) -> str:
    """
    Generate a DuckDuckGo search query for the model based on the last user message.

    Args:
        model_name: Ollama model name (e.g. 'deepseek-r1:8b')
        messages: History of chat messages (each a dict with 'role' & 'content')

    Returns:
        The text of a DuckDuckGo query.
    """
    print(f"Generating query using model: {model_name}")  # Debug print statement
    # Check if messages list is not empty and ensures that the last message contains a 'content' field.
    # If not, it raises a ValueError.
    if not messages or 'content' not in messages[-1]:
        raise ValueError("`messages` must be a non-empty list of dicts with a 'content' field")

    # It retrieves the content of the last user message, which will be used to generate a search query.
    user_prompt = messages[-1]['content']

    # An instruction string is constructed that tells the model to create a DuckDuckGo query based on the user's prompt.
    # This is crafted to specify the expected format of the output. This clarity improves the chances of receiving a useful response.
    instruction = (
        "/no_think\n"
        "You are a DuckDuckGo query generator.  \n"
        "Input: free-form user text.  \n"
        "Output: only the best DuckDuckGo search string.\n"
        f"User prompt: {user_prompt}\n"
        "Search query:"
    )

    try:
        # Call the Ollama API to get the response based on the instruction
        raw = call_ollama(instruction, mymessages.query_msg, model_name)
        # Debug print statement to confirm response receipt
        print("query_generator. Response received from model.")

    except Exception as e:
        # Handle any errors gracefully
        raise RuntimeError(f"Ollama chat failed: {e}") # Raise an error with a message if the chat fails

    # Check if the response is a string; if not, fallback to the original user prompt
    if not isinstance(raw, str):
        return user_prompt

    # Strip out any <think>…</think> blocks from the response
    cleaned = re.sub(r'<think>.*?<think>', '', raw, flags=re.DOTALL).strip()
    # Split the cleaned response into lines and filter out empty lines
    lines = [ln.strip() for ln in cleaned.splitlines() if ln.strip()]

    # If there are no valid lines, fallback to user prompt
    if not lines:
        return user_prompt

    # Get the last non-empty line as the candidate query
    candidate = lines[-1]

    # Remove optional leading "QUERY:" or surrounding quotes from the candidate query
    query = re.sub(
        r'^(?:QUERY:)?\s*["“]?(.+?)["”]?$',
        r'\1',
        candidate,
        flags=re.IGNORECASE
    ).strip()

    # Display the response from the model
    display_text_color(f"Response from model: {query}", Fore.RED)
    # Final fallback if query is empty
    return query or user_prompt

ai_web_search

def ai_web_search(query: str, model_name: str = "deepseek-r1:8b") -> str:
    """
    It generates a refined search query based on the user's input and the conversation history, leveraging the specified AI model to enhance the quality of the search terms.

    Args:
        query (str): The search query to use
        model_name (str): Name of the model to use for generating the query

    Returns:
        str: It returns the refined search query, ready for use in further web searches.
    """
    # Initialize conversation with system and user prompts
    messages = [
        mymessages.assistant_msg, # System prompt from mymessages module
        mymessages.myuser_msg, # User prompt from mymessages module
    ]

    # Append the user's query to the conversation
    messages.append({"role": "user", "content": query})

    # It calls query_generator to generate a refined search query based on the conversation history. This utilizes the specified AI model.
    query = query_generator(model_name, messages)

    # Debug print statement to show the refined query
    print(f"Refined search query: {query}")
    # Return the final refined search query
    return query

Tor-Backed DuckDuckGo Searches

We use Tor to anonymize our DuckDuckGo API calls via DDGS. We use scrape_web_content. It scrapes url, falls back through several engines, then summarizes with Ollama (except trafilatura).

When you route DuckDuckGo searches through Tor, you often hit 403 rate-limits on their HTML endpoints. This happens because Tor exit nodes are shared and frequently blacklisted. In this article, we’ll:

  1. Rotate Tor circuits via the control port (stem library).
  2. Randomize User-Agent headers so each request looks like a different browser.
  3. Implement exponential backoff and retry logic.
  4. Fallback to manual HTML scraping if the duckduckgo_search library keeps failing.

Prerequisites: pip install duckduckgo_search stem requests beautifulsoup4. Your Tor Browser installation ships with its own torrc file under the “Data” directory (e.g., C:\Users\Owner\Desktop\Tor Browser\Browser\TorBrowser\Data\Tor\torrc), open it in a text editor, and add lines like these to enable the control port:

Install one of the SOCKS-capable backends that requests can use: pip install requests[socks] or, equivalently pip install PySocks.

ControlPort 9051 # Tells Tor to listen on localhost TCP port 9051 for control commands (e.g. NEWNYM).
HashedControlPassword 16:ABCD1234...  # replace with your actual hashed password
# Instead of storing your cleartext password, you supply the hash.
# Generate it by running...
.\tor --hash-password "YOUR-PASSWORD"
# Run these command from a shell with Tor in your PATH
# Or from C:\Users\Owner\Desktop\Tor Browser\Browser\TorBrowser\Tor\

After saving, restart Tor Browser so it picks up the new torrc.

Rotate Tor Circuit

Use stem to signal a new identity before each attempt:

from stem import Signal
from stem.control import Controller
import random
import requests
from bs4 import BeautifulSoup
# Path to the Tor executable
# The r prefix indicates a raw string, which is useful for Windows paths to avoid issues with backslashes.
TOR_EXE_PATH = r"C:\Users\Owner\Desktop\Tor Browser\Browser\TorBrowser\Tor\tor.exe"
# The localhost address where the SOCKS proxy listens
TOR_SOCKS_HOST = "127.0.0.1"
# The port number for the Tor SOCKS proxy, which is 9050 by default.
TOR_SOCKS_PORT = 9050
# TOR_SOCKS: This variable defines the URL for the SOCKS proxy provided by Tor:
# 1. socks5h indicates that the connection will use the SOCKS5 protocol with hostname resolution through the proxy.
# 2. 127.0.0.1 refers to the localhost, meaning the proxy is running on the same machine as the application.
# 3. 9050 is the default port for the SOCKS proxy in Tor.
# This is where applications can connect to send their traffic through the Tor network, ensuring anonymity.
TOR_SOCKS = "socks5h://127.0.0.1:9050"
# This variable specifies the port used to communicate with the Tor control interface.
# Default Port: 9051 is the standard control port for Tor.
# This port allows clients to send commands to the Tor process, such as requesting a new circuit (via the NEWNYM command) or checking the status of the Tor connection.
TOR_CONTROL_PORT = 9051
# Create a small list of common browser UAs and pick one at random each try:
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:127.0) Gecko/20100101 Firefox/127.0",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15"
]

def renew_tor_identity() -> str:
    """Tell/Signal Tor to build a new circuit (NEWNYM, new exit IP)."""
    print("� Renewing Tor identity...")  # Debug print statement
    # Attempt to connect to the Tor control port and request a new identity
    # This is done to change the exit node and thus the IP address used for requests.
    # The function will retry once if the initial connection fails, allowing for Tor to be started if it wasn't running
    attempt = 0
    max_attempts = 2
    # Loop to handle connection attempts
    # It will try to connect to the Tor control port and signal for a new identity.
    while attempt < max_attempts:
        print(f"Attempt {attempt + 1} to renew Tor identity...")
        # Use the stem library to connect to the Tor control port
        try:
            with Controller.from_port(port=TOR_CONTROL_PORT) as controller:
                # Attempt to connect to the Tor control port
                # If successful, authenticate with the control password and signal for a new identity
                print("Connected to Tor control port.")
                # Authenticate with the Tor control port using the predefined password
                # This is necessary to send commands like NEWNYM
                # The password is set in the torrc file
                controller.authenticate(password=TOR_CONTROL_PASS)
                controller.signal(Signal.NEWNYM)
                print("Requested new Tor identity.")
                # Wait for a short period to allow Tor to establish a new circuit
                time.sleep(controller.get_newnym_wait())
                return True
        except stem.SocketError as e:
            print(f"Could not connect to Tor on port {TOR_CONTROL_PORT}: {e}")
            if attempt == 0:
                print("Trying to launch Tor...")
                # If the first attempt fails, try to start Tor
                # This is useful if Tor wasn't running when the script started
                start_tor()
                attempt += 1
                continue
            else:
                # If the second attempt also fails, print an error message and return False
                print("Failed to launch Tor after retry.")
                return False
        except stem.connection.AuthenticationFailure as e:
            # Handle authentication failure if the control password is incorrect
            # This can happen if the password in the script does not match the one set in the torrc file
            print(f"Tor authentication failed: {e}")
            return False
        except Exception as e:
            # Catch any other unexpected exceptions and print an error message
            print(f"Unexpected error: {e}")
            return False
        break

def random_ua() -> str:
    """Return a random User-Agent header from the predefined list."""
    return random.choice(USER_AGENTS)

def tor_is_listening(host: str = TOR_SOCKS_HOST, port: int = TOR_SOCKS_PORT) -> bool:
    """Return True if something is already listening on the Tor SOCKS port.

    Args:
        Accepts host and port with default values to check the Tor SOCKS proxy.

    Return:
        If the connection is successful, it returns True, indicating that Tor is running.
        If an exception occurs (such as a timeout or connection refusal), it returns False.
    """
    try:
        # Attempt to create a connection to the specified Tor socks host and port
        with socket.create_connection((host, port), timeout=2):
            return True # If successful, Tor is likely running
    except Exception:
        return False # If an exception occurs, Tor is not running

def start_tor():
    """Launch Tor in the background if it is not already running."""
    if tor_is_listening(): # Check if Tor is already active.
        return # If listening, exit the function, we are done.
    # Check if the Tor executable exists
    if not os.path.isfile(TOR_EXE_PATH):
        raise RuntimeError(
            f"Tor executable not found at {TOR_EXE_PATH}. "
            "Install Tor Browser or Expert Bundle."
        )
    # Start Tor in a detached process (background) using subprocess.Popen,
    # redirecting its output to DEVNULL to prevent console clutter.
    subprocess.Popen([TOR_EXE_PATH], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    # Implements a loop to wait up to 30 seconds for Tor to open its SOCKS port.
    for _ in range(30):
        if tor_is_listening(): # Each iteration check if Tor has opened the SOCKS port
            return # If it has opened the SOCKS port, exit the function
        print("Waiting for Tor to open SOCKS port...") # Debug print statement checking again
    else:
        # Raise an error if the port isn't opened
        raise TimeoutError("Tor did not open the SOCKS port within 30s")

duckduckgo_search

def duckduckgo_search(query: str, max_results = 2, max_retries=4):
    """
    Perform a DuckDuckGo search and return up to `max_results` hits.

    Args:
        query (str): The search query to use

    Returns:
        str: The first search result URL or an error message
    """
    print(f"DuckDuckGo: {query}") # Debug print statement for the query

    # Ensure Tor is ready before making the search
    start_tor()

    # Build DDGS instance that always routes through Tor
    # ddgs = DDGS(proxy=f"socks5h://{TOR_SOCKS_HOST}:{TOR_SOCKS_PORT}", timeout=20)
    ddgs = DDGS(proxy=f"socks5h://{TOR_SOCKS_HOST}:{TOR_SOCKS_PORT}", timeout=20)

    ## Confirm that the query is valid
    # It checks if the query is empty, if max_results is a positive integer, and if query is a string.
    # If any of these checks fail, it prints an error message and returns an appropriate error message
    if not query:
        print("Error: No search query provided.")
        return "No search query provided."
    if not isinstance(max_results, int) or max_results <= 0:
        print("Error: max_results must be a positive integer.")
        return "max_results must be a positive integer."
    if not isinstance(query, str):
        print("Error: query must be a string.")
        return "query must be a string."

    delay = 2  # Initial backoff at 2 seconds

    # Attempt to perform the search up to `max_retries` times
    for attempt in range(1, max_retries + 1):
        # Renew Tor exit IP
        renew_tor_identity()

        # Pick a fresh User-Agent
        ua = random_ua()

        # Set the User-Agent for the DDGS instance
        # This is important to avoid 403 errors from DuckDuckGo, which may block requests
        # due to suspicious activity or automated scraping attempts.
        ddgs = DDGS(
            proxy=TOR_SOCKS,
            timeout=20,
            headers={"User-Agent": ua}
        )

        # Attempt the search
        try:
            display_text_color(f"Attempt {attempt}: '{query}' via Tor exit—UA: {ua}", Fore.YELLOW)
            # Perform the DuckDuckGo search using the DDGS instance
            # The `text` method is used to perform a text search (returns a list of result dicts).
            # The `max_results` parameter limits the number of results returned.
            results = ddgs.text(query, max_results=max_results)

            # Display the search results
            for idx, result in enumerate(results, start=1):
                # Check if the result has a title and href
                if not result.get('title') or not result.get('href'):
                    display_text_color(f"Result {idx} is missing title or href.", Fore.RED)
                    continue

                # Display the result title and URL in cyan color
                display_text_color(f"{idx}. {result['title']}\n   {result['href']}", Fore.CYAN)
                # Scrape and summarize web content using Ollama
                # This function will handle the scraping and summarization of the content at the URL
                scrape_web_summarize(result['href'])
                return  # success

        except Exception as e:
            display_text_color(f"[{attempt}] Unexpected error: {e}", Fore.RED)

        # If an exception occurs, wait and retry with exponential backoff
        time.sleep(delay)
        delay *= 2  # Exponential backoff

    # if we get here, all retries failed, then log the failure
    print("DuckDuckGo search failed after multiple retries.")
    # Manual HTML Fallback
    # If DDGS still 403s, fetch and parse DuckDuckGo’s HTML directly, you could...
    duckduckgo_html_fallback(query)
    # If we have a SearX instance (e.g., a container in Proxmox with a SearX docker image),
    # we can try to use that as a fallback.
    searx_search_fallback(query)
def my_duckduckgo_search(query: str, model_name="qwen3:8b"):
    """
    Perform a DuckDuckGo search and return the first results.

    Args:
        query (str): The search query to use
        model_name (str): Name of the model to use for generating the query

    Returns:
        None: It does not return anything, but prints the search results and scrapes the content of each result.
    """
    print(f"my_duckduckgo_search: {query}") # Debug print statement

    # Generate an improved search query using AI
    improved_query = ai_web_search(query, model_query) # Leverage AI to refine the query
    # Perform the DuckDuckGo search with the improved query
    duckduckgo_search(improved_query, max_results, max_retries)

Fallbacks & Scraping Helpers

def scrape_web_title(url: str = "")-> str:
    """
    Scrape the title of a web page.

    Args:
        url (str): The URL of the web page to scrape

    Returns:
        str: The title of the web page or an error message
    """

    # Validate the URL format to ensure it starts with http:// or https://
    if not url.startswith(("http://", "https://")):
        return "Invalid URL format; make sure it starts with http:// or https://"

    try:
        # Send a GET request to the specified URL with a timeout of 10 seconds
        response = requests.get(url, timeout=10)
        response.raise_for_status() # Raise an error for bad responses (4xx or 5xx)
    except ConnectionError:
        return (
            f"Unable to connect to {url}. "
            "If this is your local Hugo server, ensure you ran:\n"
            "  hugo server --bind 0.0.0.0\n"
            "so that the site is reachable from this script."
        )
    except RequestException as e:
        return f"Error fetching content from {url}: {e}"

    # Parse the response content to extract the <title> element
    soup = BeautifulSoup(response.content, 'html.parser')
    # Extract the title from the <title> tag, if it exists
    # If the title tag is not found, it returns None
    title = soup.title.string if soup.title and soup.title.string else None
    return title.strip() if title else "No title element found on page"

def scrape_web_summarize(url: str, model_name: str = "deepseek-r1:8b") -> str:
    """
    Scrape and summarize web content using Ollama.

    Args:
        url (str): The URL of the web page to scrape

    Returns:
        str: The text content of the web page or an error message
    """

    print(f"Scraping content from: {url}") # Debug print statement to track URL being scraped
    # Validate the URL format to ensure it starts with http:// or https://
    if not url.startswith(("http://", "https://")):
        return "Invalid URL format"

    try:
        # Call a separate, auxiliary function to scrape the web content
        content = scrape_web_content(url)
        # If the content is None or empty, return an error message
        if content is None or not content.strip():
            return f"No content found at {url}. It may be blocked or unavailable."

        # Summarize the scraped content using the Ollama model
        return summarize(content)
    except requests.RequestException as e:
        # If there's an error fetching the content, return an error message
        return f"Error fetching content from {url}: {e}"


def duckduckgo_html_fallback(query: str):
    """
    Query DuckDuckGo’s HTML frontend and parse the links manually.
    """
    display_text_color("duckduckgo_html_fallback", Fore.YELLOW)
    # Ensure Tor is running before making the request
    start_tor()
    # Set the URL for DuckDuckGo's HTML frontend
    # This is the endpoint that serves HTML search results
    url = "https://html.duckduckgo.com/html"
    # Prepare the payload with the search query
    # The payload is a dictionary containing the search query
    payload = {"q": query}
    # Set the headers to include a random User-Agent
    # This is important to avoid 403 errors from DuckDuckGo, which may block requests
    headers = {"User-Agent": random_ua()}
    # Set the proxies to use the Tor SOCKS proxy
    # This ensures that the request is routed through Tor for anonymity
    proxies = {"http": TOR_SOCKS, "https": TOR_SOCKS}
    try:
        # Send a POST request to DuckDuckGo's HTML frontend with the query
        # This uses the Tor SOCKS proxy for anonymity
        resp = requests.post(url, data=payload, headers=headers, proxies=proxies, timeout=20)
        resp.raise_for_status()  # if 403 persists, we’ll see an exception

        # Parse the response content using BeautifulSoup
        # This will allow us to extract the search results from the HTML
        soup = BeautifulSoup(resp.text, "html.parser")
        # Find the search results in the parsed HTML
        # The results are contained in <a> tags with the class "result__a"

        # Select the first 5 search results
        # This limits the number of results to 5 for brevity
        for a in soup.select("a.result__a")[:5]:
            # Extract the title and href from each search result
            # The title is the text of the link, and the href is the URL it points
            title = a.get_text(strip=True)
            href  = a["href"]
                # Scrape and summarize web content using Ollama
            summary = scrape_web_summarize(href)
            if summary:
                    # Display the title, href, and summary
                    display_text_color(f"{title}\n   {href}\n   {summary}", Fore.GREEN)

    except requests.exceptions.HTTPError as e:
        # Handle HTTP errors, such as 401 Unauthorized or 403 Forbidden
        if resp.status_code == 401:
            print("❌ Unauthorized (401): Access is denied for the DuckDuckGo HTML endpoint.")
        elif resp.status_code == 403:
            print("❌ Forbidden (403): DuckDuckGo is blocking your automated request. Try another IP, time, or proxy.")
        else:
            print(f"❌ HTTP error: {e}")
    except requests.exceptions.ProxyError as e:
        # Handle proxy errors
        print(f"❌ Proxy connection error: {e}")
    except requests.exceptions.RequestException as e:
        # Handle other request exceptions
        print(f"❌ Network error: {e}")
    except Exception as e:
        # Handle unexpected exceptions
        print(f"❌ Unexpected error: {e}")

Searx Fallback

DuckDuckGo blocks automated access. The HTML interface at https://html.duckduckgo.com/html is not intended for programmatic scraping. DuckDuckGo actively blocks bot traffic, especially frequent requests or those via proxies/anonymizers like Tor.

If you have set up a SearXNG container in your homelab, then you have another alternative without relying in third party search engines policies.

def searx_search_fallback(query: str, max_results: int = 5, searx_url: str = None):
    """
    Query our SearX instance and print parsed links.

    query (str): The search query to use. Must be non-empty.
        max_results (int): The maximum number of search results to return (default is 5).
        searx_url (str): Full URL of your SearX search endpoint. If None, read from SEARX_URL env var or default to http://127.0.0.1:8080/search.

    Raises:
        ValueError: If query is empty or max_results is not positive.
        RuntimeError: On HTTP or network failures after retries.
    """
    display_text_color("searx_search_fallback", Fore.YELLOW)
    # Validate the inputs
    if not isinstance(query, str) or not query.strip():
        raise ValueError("query must be a non-empty string")
    if not isinstance(max_results, int) or max_results <= 0:
        raise ValueError("max_results must be a positive integer")

    # Get the SearX URL
    # If searx_url is not provided, read it from the environment variable
    searx_url = searx_url or SEARX_URL
    if not searx_url.startswith(("http://", "https://")):
        raise ValueError(f"Invalid SEARX_URL: {searx_url}")

    # Ensure Tor is running before making the request
    start_tor()
    # Set the URL for the SearX instance
    # This is the endpoint that serves SearX search results
    url = "http://192.168.1.59:8080/search"
    # Prepare the parameters for the SearX search
    # The parameters include the search query, format (JSON), and language
    params = {
        "q": query,  # The query parameter for the search
        "format": "json",  # Specify that the response should be in JSON format
        "language": "en",  # Set the language for the search results
        "count": max_results  # Set the maximum number of search results
    }
    # Set the headers to include a User-Agent
    # This is important to avoid 403 errors from SearX, which may block requests
    headers = {
        "User-Agent": "Mozilla/5.0 (compatible; MyBot/1.0; +http://yourdomain.example)",
    }
    # Set the SOCKS proxy for Tor
    proxies = {"http": "socks5h://127.0.0.1:9050",
               "https": "socks5h://127.0.0.1:9050"}

    for attempt in range(1, 4):
        try:
            # Rotate Tor identity
            # This is done to change the exit node and thus the IP address used for requests
            with Controller.from_port(port=9051) as ctl:
                # Authenticate with the Tor control port
                ctl.authenticate(password=os.getenv(
                    "TOR_CONTROL_PASS", "Anawim"))
                # Signal for a new identity
                ctl.signal(Signal.NEWNYM)
                # Wait for a short period to allow Tor to establish a new circuit
                time.sleep(ctl.get_newnym_wait())

                # Make the SearX request
                resp = requests.get(
                    url, params=params, headers=headers, proxies=proxies)
                # Raise an exception for HTTP errors (4xx or 5xx)
                # This will raise an HTTPError if the response status code indicates an error
                resp.raise_for_status()
                # Parse the JSON response from SearX
                # This will convert the response content into a Python dictionary
                data = resp.json()

                # Check if the response contains results
                # If the "results" key is not present, print an error message and return
                if "results" not in data:
                    print("No results found in SearX response.")
                    return
                # Extract the search results from the parsed JSON
                # The results are contained in the "results" key of the JSON response
                results = data.get("results", [])
                # Check if the results list is empty
                if not results:
                    print("ℹ️  No results returned by SearX.")

                # Iterate over the search results
                for idx, result in enumerate(results[:max_results], start=1):
                    # Extract the title and href from each search result
                    title = result.get("title", "")
                    # The href is the URL of the search result
                    # If the "url" key is not present, it defaults to an empty string
                    href = result.get("url", "")
                    # We scrape and summarize as before
                    summary = scrape_web_summarize(href)
                    if summary:
                        # Print the result number, title, href, and summary
                        display_text_color(f"{idx}. {title}\n   {href}\n   {summary}", Fore.GREEN)

                return

        except Exception as e:
            # Handle unexpected exceptions
            print(f"Searx search failed: {e}")

if __name__ == "__main__":
    my_duckduckgo_search("linux distributions","qwen3:8b") # Example usage of the search function
Bitcoin donation

JustToThePoint Copyright © 2011 - 2025 Anawim. ALL RIGHTS RESERVED. Bilingual e-books, articles, and videos to help your child and your entire family succeed, develop a healthy lifestyle, and have a lot of fun. Social Issues, Join us.

This website uses cookies to improve your navigation experience.
By continuing, you are consenting to our use of cookies, in accordance with our Cookies Policy and Website Terms and Conditions of use.