mirror of
https://github.com/gabrielkheisa/instagram-downloader.git
synced 2025-09-26 13:09:22 +00:00
Compare commits
2 Commits
78b6f3dc7e
...
dev
Author | SHA1 | Date | |
---|---|---|---|
|
43e540d7b0 | ||
|
1410f1c3d8 |
28
Dockerfile
Normal file
28
Dockerfile
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Use the official Selenium standalone Chrome image as base
|
||||||
|
FROM selenium/standalone-chrome:112.0.5615.165-chromedriver-112.0.5615.49
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
# Update package lists
|
||||||
|
RUN sudo apt-get update
|
||||||
|
|
||||||
|
# Install Python 3.6 and pip
|
||||||
|
RUN sudo apt-get install -y python3
|
||||||
|
|
||||||
|
RUN sudo apt-get install -y python3-pip
|
||||||
|
|
||||||
|
# Install Selenium and any other Python dependencies you may need
|
||||||
|
RUN sudo pip install selenium==3.141.0
|
||||||
|
RUN sudo pip install --upgrade urllib3==1.26.16
|
||||||
|
RUN sudo pip install Flask
|
||||||
|
|
||||||
|
# Set the working directory
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
# Copy your Python scripts into the container
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# Example command to run your Python script
|
||||||
|
CMD ["python3", "run.py"]
|
16
README.md
16
README.md
@@ -66,10 +66,24 @@ This micro web server does not directly download the Instagram Reels video. It s
|
|||||||
git clone https://github.com/gabrielkheisa/instagram-downloader.git
|
git clone https://github.com/gabrielkheisa/instagram-downloader.git
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### With Dockerfile
|
||||||
|
|
||||||
|
1. Build the Dockerfile
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo docker build -t instagram-downloader .
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run the container
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo docker run -d -p 8080:8080 instagram-downloader
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
1. Start the Flask app:
|
1. Start the Flask app, skip this part if you use Docker:
|
||||||
|
|
||||||
```
|
```
|
||||||
python run.py
|
python run.py
|
||||||
|
60
run.py
60
run.py
@@ -1,12 +1,29 @@
|
|||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from flask import Flask, request, redirect
|
from flask import Flask, redirect
|
||||||
import concurrent.futures
|
|
||||||
import re
|
import re
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
# Define the maximum cache size and duration in seconds (4 hours)
|
||||||
|
MAX_CACHE_SIZE = 50
|
||||||
|
CACHE_DURATION = 4 * 60 * 60 # 4 hours in seconds
|
||||||
|
cache = OrderedDict(maxlen=MAX_CACHE_SIZE)
|
||||||
|
|
||||||
|
# Validate query, modify this regex as needed
|
||||||
|
VALID_QUERY_REGEX = re.compile(r'^[\w\-\.\/]+$')
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
def invalidate_old_entries():
|
||||||
|
current_time = time.time()
|
||||||
|
one_hour_ago = current_time - 3600 # 1 hour in seconds
|
||||||
|
|
||||||
|
# Iterate over a copy of the keys to avoid modifying while iterating
|
||||||
|
for key in list(cache.keys()):
|
||||||
|
timestamp, _ = cache[key]
|
||||||
|
if timestamp < one_hour_ago:
|
||||||
|
del cache[key]
|
||||||
|
|
||||||
# Define the base URL for scraping
|
# Define the base URL for scraping
|
||||||
base_url = "https://instagram.com" # Replace with your actual base URL
|
base_url = "https://instagram.com" # Replace with your actual base URL
|
||||||
|
|
||||||
@@ -18,32 +35,32 @@ options.add_argument('--disable-gpu')
|
|||||||
options.add_argument('--window-size=1920,1080')
|
options.add_argument('--window-size=1920,1080')
|
||||||
options.add_argument('--no-sandbox')
|
options.add_argument('--no-sandbox')
|
||||||
options.add_argument(f'user-agent={user_agent}')
|
options.add_argument(f'user-agent={user_agent}')
|
||||||
browser = webdriver.Chrome(executable_path="/usr/bin/chromedriver", options=options)
|
browser = webdriver.Chrome(options=options)
|
||||||
|
|
||||||
# Define the maximum cache size and duration in seconds (4 hours)
|
|
||||||
MAX_CACHE_SIZE = 50
|
|
||||||
CACHE_DURATION = 4 * 60 * 60 # 4 hours in seconds
|
|
||||||
cache = OrderedDict(maxlen=MAX_CACHE_SIZE)
|
|
||||||
|
|
||||||
# Validate query, modify this regex as needed
|
|
||||||
VALID_QUERY_REGEX = re.compile(r'^[\w\-\.\/]+$')
|
|
||||||
|
|
||||||
# Function to handle web scraping using Selenium
|
# Function to handle web scraping using Selenium
|
||||||
def get_video_source(query_string):
|
def get_video_source(query_string):
|
||||||
try:
|
try:
|
||||||
browser.delete_all_cookies()
|
browser.delete_all_cookies()
|
||||||
|
|
||||||
query_string = "/" + query_string
|
query_string = "/" + query_string
|
||||||
url = f"{base_url}{query_string}" # Combine base URL and video ID
|
url = f"{base_url}{query_string}" # Combine base URL and video ID
|
||||||
browser.get(url)
|
browser.get(url)
|
||||||
|
|
||||||
# Replace sleep with explicit wait if possible
|
# Replace sleep with explicit wait if possible
|
||||||
browser.implicitly_wait(4)
|
browser.implicitly_wait(10)
|
||||||
|
|
||||||
|
browser.save_screenshot('ss_ig_reel.png')
|
||||||
|
|
||||||
# Locate the video element using your specific xpath
|
# Locate the video element using your specific xpath
|
||||||
|
try:
|
||||||
|
# Reels
|
||||||
video_element = browser.find_element_by_xpath(
|
video_element = browser.find_element_by_xpath(
|
||||||
"/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/div[1]/div[1]/article/div/div[1]/div/div/div/div/div/div/div/video"
|
"/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/div[1]/div[1]/article/div/div[1]/div/div/div/div/div/div/div/video"
|
||||||
)
|
)
|
||||||
|
except:
|
||||||
|
# Post (image)
|
||||||
|
video_element = browser.find_element_by_xpath(
|
||||||
|
"/html/body/div[2]/div/div/div[2]/div/div/div[1]/div[1]/div[2]/section/main/div/div[1]/div/div[1]/div/div/div/div/div/div/div[1]/img"
|
||||||
|
)
|
||||||
|
|
||||||
# Get the video source and return it
|
# Get the video source and return it
|
||||||
video_source = video_element.get_attribute("src")
|
video_source = video_element.get_attribute("src")
|
||||||
@@ -51,8 +68,12 @@ def get_video_source(query_string):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Handle exceptions and return a default URL or re-raise the exception
|
# Handle exceptions and return a default URL or re-raise the exception
|
||||||
|
print("Error: ")
|
||||||
|
print(e)
|
||||||
|
browser.get("https://api.dev.gabrielkheisa.xyz/")
|
||||||
return base_url
|
return base_url
|
||||||
|
|
||||||
|
|
||||||
@app.route("/", methods=["GET"]) # Route for empty query string
|
@app.route("/", methods=["GET"]) # Route for empty query string
|
||||||
def handle_empty_query():
|
def handle_empty_query():
|
||||||
return redirect("https://github.com/gabrielkheisa/instagram-downloader")
|
return redirect("https://github.com/gabrielkheisa/instagram-downloader")
|
||||||
@@ -61,7 +82,7 @@ def handle_empty_query():
|
|||||||
def get_video_source_server(query_string):
|
def get_video_source_server(query_string):
|
||||||
global cache # Ensure we reference the global cache variable
|
global cache # Ensure we reference the global cache variable
|
||||||
print(query_string)
|
print(query_string)
|
||||||
if len(query_string) > 30:
|
if len(query_string) > 80:
|
||||||
return '', 204
|
return '', 204
|
||||||
|
|
||||||
if not VALID_QUERY_REGEX.match(query_string):
|
if not VALID_QUERY_REGEX.match(query_string):
|
||||||
@@ -88,16 +109,13 @@ def get_video_source_server(query_string):
|
|||||||
cache[query_string] = video_source
|
cache[query_string] = video_source
|
||||||
return redirect(video_source["url"])
|
return redirect(video_source["url"])
|
||||||
|
|
||||||
# Create a ThreadPoolExecutor for parallel execution with a timeout of 15 seconds
|
# Get the video source sequentially
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
video_source = get_video_source(query_string)
|
||||||
future = executor.submit(get_video_source, query_string)
|
|
||||||
try:
|
|
||||||
video_source = future.result(timeout=15) # Timeout set to 15 seconds
|
|
||||||
# Add the new entry to the cache with a timestamp
|
# Add the new entry to the cache with a timestamp
|
||||||
cache[query_string] = {"url": video_source, "timestamp": time.time()}
|
cache[query_string] = {"url": video_source, "timestamp": time.time()}
|
||||||
|
|
||||||
return redirect(video_source)
|
return redirect(video_source)
|
||||||
except concurrent.futures.TimeoutError:
|
|
||||||
return redirect(base_url) # Handle timeout - return a default URL or handle as needed
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(debug=False, port=8080, host="0.0.0.0")
|
app.run(debug=False, port=8080, host="0.0.0.0")
|
Reference in New Issue
Block a user