Merge pull request #4 from gabrielkheisa/dev

URL validation
2026-07-04 17:52:58 +00:00 · 2023-12-19 16:36:23 +07:00 · 2023-12-19 16:34:37 +07:00 · 2023-12-19 15:47:15 +07:00 · 2023-12-19 15:38:08 +07:00
2 changed files with 45 additions and 11 deletions
@@ -17,8 +17,9 @@ https://ig.gabrielkheisa.xyz/reel/Cz3dNmDMVC9/?igshid=MzRlODBiNWFlZA==
 ```
 ### Returns redirect:
 ```
-https://scontent.cdninstagram.com/v/t66.30100-16/316926421_1723935788092224_3596729375098306652_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_cat=100&_nc_ohc=6lyBPVcjJkYAX8kLe3I&edm=APs17CUBAAAA&ccb=7-5&oh=00_AfBNGf7HzFPnd-mhfvhZZZRk_-PlN3qx3hqbsINaUGA4aA&oe=6576D61D&_nc_sid=10d13b
+https://scontent.cdninstagram.com/v/t66.30100-16/316926421_1723935788092224_3596729375098306652_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_cat=100&_nc_ohc=6lyBPVcj...............
 ```
+Notes: This redirect URL (https://scontent.cdninstagram.com/...) has expiration, in which you need to re-run the query to get a new URL signature

 ## DISCLAIMER:

@@ -1,7 +1,9 @@
 from selenium import webdriver
 from flask import Flask, request, redirect
 import concurrent.futures
+import re
 from collections import OrderedDict
+import time

 app = Flask(__name__)

@@ -18,7 +20,13 @@ options.add_argument('--no-sandbox')
 options.add_argument(f'user-agent={user_agent}')
 browser = webdriver.Chrome(executable_path="/usr/bin/chromedriver", options=options) 

-cache = OrderedDict(maxlen=50)
+# Define the maximum cache size and duration in seconds (4 hours)
+MAX_CACHE_SIZE = 50
+CACHE_DURATION = 4 * 60 * 60  # 4 hours in seconds
+cache = OrderedDict(maxlen=MAX_CACHE_SIZE)
+
+# Validate query, modify this regex as needed
+VALID_QUERY_REGEX = re.compile(r'^[\w\-\.\/]+$')

 # Function to handle web scraping using Selenium
 def get_video_source(query_string):
@@ -44,27 +52,52 @@ def get_video_source(query_string):
    except Exception as e:
        # Handle exceptions and return a default URL or re-raise the exception
        return base_url
+    
+@app.route("/", methods=["GET"])  # Route for empty query string
+def handle_empty_query():
+    return redirect("https://github.com/gabrielkheisa/instagram-downloader")

@app.route("/<path:query_string>", methods=["GET"])
 def get_video_source_server(query_string):
+    global cache  # Ensure we reference the global cache variable
+    print(query_string)
    if len(query_string) > 30:
-        # Reject the request by returning a 414 error code
-        return abort(414, description="Query string too long")
+        return '', 204
+
+    if not VALID_QUERY_REGEX.match(query_string):
+        return "Invalid link", 400
+
+
+    # Clean up entries older than 4 hours
+    current_time = time.time()
+    keys_to_remove = []
+    for key in list(cache.keys()):
+        value = cache[key]
+        if isinstance(value, dict) and "timestamp" in value:
+            timestamp = value["timestamp"]
+            if current_time - timestamp >= CACHE_DURATION:
+                keys_to_remove.append(key)
+
+    for key in keys_to_remove:
+        cache.pop(key, None)
+
    if query_string in cache:
-        # If cached, move to the front of the OrderedDict to update its age
+        # Move the existing entry to the front of the cache and update its timestamp
        video_source = cache.pop(query_string)
+        video_source["timestamp"] = time.time()
        cache[query_string] = video_source
-        return redirect(video_source)
-    # Create a ThreadPoolExecutor for parallel execution with a timeout of 3 seconds
+        return redirect(video_source["url"])
+
+    # Create a ThreadPoolExecutor for parallel execution with a timeout of 8 seconds
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future = executor.submit(get_video_source, query_string)
        try:
-            video_source = future.result(timeout=10)  # Timeout set to 3 seconds
-            cache[query_string] = video_source
+            video_source = future.result(timeout=8)  # Timeout set to 8 seconds
+            # Add the new entry to the cache with a timestamp
+            cache[query_string] = {"url": video_source, "timestamp": time.time()}
            return redirect(video_source)
        except concurrent.futures.TimeoutError:
-            # Handle timeout - return a default URL or handle as needed
-            return redirect(base_url)
+            return redirect(base_url)  # Handle timeout - return a default URL or handle as needed

 if __name__ == "__main__":
    app.run(debug=False, port=8080, host="0.0.0.0")
Author	SHA1	Message	Date
Gabriel Kheisa	6fa7019fb7	Merge pull request #4 from gabrielkheisa/dev URL validation	2023-12-19 16:36:23 +07:00
gabrielkheisa	6a581c917f	URL validation	2023-12-19 16:34:37 +07:00
Gabriel Kheisa	9deb70acc1	Merge pull request #2 from gabrielkheisa/dev	2023-12-19 15:47:15 +07:00
gabrielkheisa	595c1e54de	update	2023-12-19 15:38:08 +07:00