Merge pull request #4 from gabrielkheisa/dev

URL validation
2025-10-18 15:39:23 +00:00 · 2023-12-19 16:36:23 +07:00
parent 9deb70acc1 6a581c917f
commit 6fa7019fb7
2 changed files with 15 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -17,8 +17,9 @@ https://ig.gabrielkheisa.xyz/reel/Cz3dNmDMVC9/?igshid=MzRlODBiNWFlZA==
 ```
 ### Returns redirect:
 ```
-https://scontent.cdninstagram.com/v/t66.30100-16/316926421_1723935788092224_3596729375098306652_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_cat=100&_nc_ohc=6lyBPVcjJkYAX8kLe3I&edm=APs17CUBAAAA&ccb=7-5&oh=00_AfBNGf7HzFPnd-mhfvhZZZRk_-PlN3qx3hqbsINaUGA4aA&oe=6576D61D&_nc_sid=10d13b
+https://scontent.cdninstagram.com/v/t66.30100-16/316926421_1723935788092224_3596729375098306652_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_cat=100&_nc_ohc=6lyBPVcj...............
 ```
+Notes: This redirect URL (https://scontent.cdninstagram.com/...) has expiration, in which you need to re-run the query to get a new URL signature

 ## DISCLAIMER:

--- a/run.py
+++ b/run.py
@@ -1,6 +1,7 @@
 from selenium import webdriver
 from flask import Flask, request, redirect
 import concurrent.futures
+import re
 from collections import OrderedDict
 import time

@@ -24,6 +25,9 @@ MAX_CACHE_SIZE = 50
 CACHE_DURATION = 4 * 60 * 60  # 4 hours in seconds
 cache = OrderedDict(maxlen=MAX_CACHE_SIZE)

+# Validate query, modify this regex as needed
+VALID_QUERY_REGEX = re.compile(r'^[\w\-\.\/]+$')
+
 # Function to handle web scraping using Selenium
 def get_video_source(query_string):
    try:
@@ -48,14 +52,22 @@ def get_video_source(query_string):
    except Exception as e:
        # Handle exceptions and return a default URL or re-raise the exception
        return base_url
+    
+@app.route("/", methods=["GET"])  # Route for empty query string
+def handle_empty_query():
+    return redirect("https://github.com/gabrielkheisa/instagram-downloader")

@app.route("/<path:query_string>", methods=["GET"])
 def get_video_source_server(query_string):
    global cache  # Ensure we reference the global cache variable
-
+    print(query_string)
    if len(query_string) > 30:
        return '', 204

+    if not VALID_QUERY_REGEX.match(query_string):
+        return "Invalid link", 400
+
+
    # Clean up entries older than 4 hours
    current_time = time.time()
    keys_to_remove = []