From 6a581c917f42abcf505449c6948ea60bd092d235 Mon Sep 17 00:00:00 2001 From: gabrielkheisa Date: Tue, 19 Dec 2023 16:34:37 +0700 Subject: [PATCH] URL validation --- README.md | 3 ++- run.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 99bc235..02e5b69 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,9 @@ https://ig.gabrielkheisa.xyz/reel/Cz3dNmDMVC9/?igshid=MzRlODBiNWFlZA== ``` ### Returns redirect: ``` -https://scontent.cdninstagram.com/v/t66.30100-16/316926421_1723935788092224_3596729375098306652_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_cat=100&_nc_ohc=6lyBPVcjJkYAX8kLe3I&edm=APs17CUBAAAA&ccb=7-5&oh=00_AfBNGf7HzFPnd-mhfvhZZZRk_-PlN3qx3hqbsINaUGA4aA&oe=6576D61D&_nc_sid=10d13b +https://scontent.cdninstagram.com/v/t66.30100-16/316926421_1723935788092224_3596729375098306652_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_cat=100&_nc_ohc=6lyBPVcj............... ``` +Notes: This redirect URL (https://scontent.cdninstagram.com/...) has expiration, in which you need to re-run the query to get a new URL signature ## DISCLAIMER: diff --git a/run.py b/run.py index a8f66d6..45e10b5 100644 --- a/run.py +++ b/run.py @@ -1,6 +1,7 @@ from selenium import webdriver from flask import Flask, request, redirect import concurrent.futures +import re from collections import OrderedDict import time @@ -24,6 +25,9 @@ MAX_CACHE_SIZE = 50 CACHE_DURATION = 4 * 60 * 60 # 4 hours in seconds cache = OrderedDict(maxlen=MAX_CACHE_SIZE) +# Validate query, modify this regex as needed +VALID_QUERY_REGEX = re.compile(r'^[\w\-\.\/]+$') + # Function to handle web scraping using Selenium def get_video_source(query_string): try: @@ -48,14 +52,22 @@ def get_video_source(query_string): except Exception as e: # Handle exceptions and return a default URL or re-raise the exception return base_url + +@app.route("/", methods=["GET"]) # Route for empty query string +def handle_empty_query(): + return redirect("https://github.com/gabrielkheisa/instagram-downloader") @app.route("/", methods=["GET"]) def get_video_source_server(query_string): global cache # Ensure we reference the global cache variable - + print(query_string) if len(query_string) > 30: return '', 204 + if not VALID_QUERY_REGEX.match(query_string): + return "Invalid link", 400 + + # Clean up entries older than 4 hours current_time = time.time() keys_to_remove = []