mirror of
https://github.com/gabrielkheisa/instagram-downloader.git
synced 2025-09-13 06:41:09 +00:00
Compare commits
9 Commits
b37c957637
...
main
Author | SHA1 | Date | |
---|---|---|---|
|
c83b842968 | ||
|
c293cf5e67 | ||
|
2cb5041524 | ||
|
e3d935f6da | ||
|
107c837fe8 | ||
|
78b6f3dc7e | ||
|
6a581c917f | ||
|
9deb70acc1 | ||
|
595c1e54de |
23
.github/workflows/docker-image.yml
vendored
Normal file
23
.github/workflows/docker-image.yml
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
name: Build and Publish Docker Image
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main # Trigger the workflow on pushes to the main branch
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Login to GitHub Packages Docker registry
|
||||
run: echo "${{ secrets.GH_TOKEN }}" | docker login docker.pkg.github.com -u ${{ github.repository_owner }} --password-stdin
|
||||
|
||||
- name: Build Docker image
|
||||
run: docker build -t docker.pkg.github.com/${{ github.repository }}/${{ github.repository }}:latest .
|
||||
|
||||
- name: Push Docker image
|
||||
run: docker push docker.pkg.github.com/${{ github.repository }}/${{ github.repository }}:latest
|
28
Dockerfile
Normal file
28
Dockerfile
Normal file
@@ -0,0 +1,28 @@
|
||||
# Use the official Selenium standalone Chrome image as base
|
||||
FROM selenium/standalone-chrome:112.0.5615.165-chromedriver-112.0.5615.49
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Update package lists
|
||||
RUN sudo apt-get update
|
||||
|
||||
# Install Python 3.6 and pip
|
||||
RUN sudo apt-get install -y python3
|
||||
|
||||
RUN sudo apt-get install -y python3-pip
|
||||
|
||||
# Install Selenium and any other Python dependencies you may need
|
||||
RUN sudo pip install selenium==3.141.0
|
||||
RUN sudo pip install --upgrade urllib3==1.26.16
|
||||
RUN sudo pip install Flask
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
# Copy your Python scripts into the container
|
||||
COPY . .
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
# Example command to run your Python script
|
||||
CMD ["python3", "run.py"]
|
33
README.md
33
README.md
@@ -17,8 +17,9 @@ https://ig.gabrielkheisa.xyz/reel/Cz3dNmDMVC9/?igshid=MzRlODBiNWFlZA==
|
||||
```
|
||||
### Returns redirect:
|
||||
```
|
||||
https://scontent.cdninstagram.com/v/t66.30100-16/316926421_1723935788092224_3596729375098306652_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_cat=100&_nc_ohc=6lyBPVcjJkYAX8kLe3I&edm=APs17CUBAAAA&ccb=7-5&oh=00_AfBNGf7HzFPnd-mhfvhZZZRk_-PlN3qx3hqbsINaUGA4aA&oe=6576D61D&_nc_sid=10d13b
|
||||
https://scontent.cdninstagram.com/v/t66.30100-16/316926421_1723935788092224_3596729375098306652_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_cat=100&_nc_ohc=6lyBPVcj...............
|
||||
```
|
||||
Notes: This redirect URL (https://scontent.cdninstagram.com/...) has expiration, in which you need to re-run the query to get a new URL signature
|
||||
|
||||
## DISCLAIMER:
|
||||
|
||||
@@ -65,10 +66,38 @@ This micro web server does not directly download the Instagram Reels video. It s
|
||||
git clone https://github.com/gabrielkheisa/instagram-downloader.git
|
||||
```
|
||||
|
||||
### With Dockerfile
|
||||
|
||||
1. Build the Dockerfile
|
||||
|
||||
```
|
||||
sudo docker build -t instagram-downloader .
|
||||
```
|
||||
|
||||
2. Run the container
|
||||
|
||||
```
|
||||
sudo docker run -d -p 8080:8080 instagram-downloader
|
||||
```
|
||||
|
||||
### With existing Docker Image
|
||||
|
||||
1. Pull the Docker image
|
||||
|
||||
```
|
||||
sudo docker pull ghcr.io/gabrielkheisa/instagram-downloader/gabrielkheisa/instagram-downloader:latest
|
||||
```
|
||||
|
||||
2. Run the container
|
||||
|
||||
```
|
||||
sudo docker run -d -p 8080:8080 ghcr.io/gabrielkheisa/instagram-downloader/gabrielkheisa/instagram-downloader
|
||||
```
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
1. Start the Flask app:
|
||||
1. Start the Flask app, skip this part if you use Docker:
|
||||
|
||||
```
|
||||
python run.py
|
||||
|
103
run.py
103
run.py
@@ -1,10 +1,29 @@
|
||||
from selenium import webdriver
|
||||
from flask import Flask, request, redirect
|
||||
import concurrent.futures
|
||||
from flask import Flask, redirect
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
import time
|
||||
|
||||
# Define the maximum cache size and duration in seconds (4 hours)
|
||||
MAX_CACHE_SIZE = 50
|
||||
CACHE_DURATION = 4 * 60 * 60 # 4 hours in seconds
|
||||
cache = OrderedDict(maxlen=MAX_CACHE_SIZE)
|
||||
|
||||
# Validate query, modify this regex as needed
|
||||
VALID_QUERY_REGEX = re.compile(r'^[\w\-\.\/]+$')
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
def invalidate_old_entries():
|
||||
current_time = time.time()
|
||||
one_hour_ago = current_time - 3600 # 1 hour in seconds
|
||||
|
||||
# Iterate over a copy of the keys to avoid modifying while iterating
|
||||
for key in list(cache.keys()):
|
||||
timestamp, _ = cache[key]
|
||||
if timestamp < one_hour_ago:
|
||||
del cache[key]
|
||||
|
||||
# Define the base URL for scraping
|
||||
base_url = "https://instagram.com" # Replace with your actual base URL
|
||||
|
||||
@@ -16,26 +35,32 @@ options.add_argument('--disable-gpu')
|
||||
options.add_argument('--window-size=1920,1080')
|
||||
options.add_argument('--no-sandbox')
|
||||
options.add_argument(f'user-agent={user_agent}')
|
||||
browser = webdriver.Chrome(executable_path="/usr/bin/chromedriver", options=options)
|
||||
|
||||
cache = OrderedDict(maxlen=50)
|
||||
browser = webdriver.Chrome(options=options)
|
||||
|
||||
# Function to handle web scraping using Selenium
|
||||
def get_video_source(query_string):
|
||||
try:
|
||||
browser.delete_all_cookies()
|
||||
|
||||
query_string = "/" + query_string
|
||||
url = f"{base_url}{query_string}" # Combine base URL and video ID
|
||||
browser.get(url)
|
||||
|
||||
# Replace sleep with explicit wait if possible
|
||||
browser.implicitly_wait(4)
|
||||
browser.implicitly_wait(10)
|
||||
|
||||
browser.save_screenshot('ss_ig_reel.png')
|
||||
|
||||
# Locate the video element using your specific xpath
|
||||
video_element = browser.find_element_by_xpath(
|
||||
"/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/div[1]/div/article/div/div[1]/div/div/div/div/div/div/div/video"
|
||||
)
|
||||
try:
|
||||
# Reels
|
||||
video_element = browser.find_element_by_xpath(
|
||||
"/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/div[1]/div[1]/article/div/div[1]/div/div/div/div/div/div/div/video"
|
||||
)
|
||||
except:
|
||||
# Post (image)
|
||||
video_element = browser.find_element_by_xpath(
|
||||
"/html/body/div[2]/div/div/div[2]/div/div/div[1]/div[1]/div[2]/section/main/div/div[1]/div/div[1]/div/div/div/div/div/div/div[1]/img"
|
||||
)
|
||||
|
||||
# Get the video source and return it
|
||||
video_source = video_element.get_attribute("src")
|
||||
@@ -43,28 +68,54 @@ def get_video_source(query_string):
|
||||
|
||||
except Exception as e:
|
||||
# Handle exceptions and return a default URL or re-raise the exception
|
||||
print("Error: ")
|
||||
print(e)
|
||||
browser.get("https://api.dev.gabrielkheisa.xyz/")
|
||||
return base_url
|
||||
|
||||
|
||||
@app.route("/", methods=["GET"]) # Route for empty query string
|
||||
def handle_empty_query():
|
||||
return redirect("https://github.com/gabrielkheisa/instagram-downloader")
|
||||
|
||||
@app.route("/<path:query_string>", methods=["GET"])
|
||||
def get_video_source_server(query_string):
|
||||
if len(query_string) > 30:
|
||||
# Reject the request by returning a 414 error code
|
||||
return abort(414, description="Query string too long")
|
||||
global cache # Ensure we reference the global cache variable
|
||||
print(query_string)
|
||||
if len(query_string) > 80:
|
||||
return '', 204
|
||||
|
||||
if not VALID_QUERY_REGEX.match(query_string):
|
||||
return "Invalid link", 400
|
||||
|
||||
|
||||
# Clean up entries older than 4 hours
|
||||
current_time = time.time()
|
||||
keys_to_remove = []
|
||||
for key in list(cache.keys()):
|
||||
value = cache[key]
|
||||
if isinstance(value, dict) and "timestamp" in value:
|
||||
timestamp = value["timestamp"]
|
||||
if current_time - timestamp >= CACHE_DURATION:
|
||||
keys_to_remove.append(key)
|
||||
|
||||
for key in keys_to_remove:
|
||||
cache.pop(key, None)
|
||||
|
||||
if query_string in cache:
|
||||
# If cached, move to the front of the OrderedDict to update its age
|
||||
# Move the existing entry to the front of the cache and update its timestamp
|
||||
video_source = cache.pop(query_string)
|
||||
video_source["timestamp"] = time.time()
|
||||
cache[query_string] = video_source
|
||||
return redirect(video_source)
|
||||
# Create a ThreadPoolExecutor for parallel execution with a timeout of 3 seconds
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = executor.submit(get_video_source, query_string)
|
||||
try:
|
||||
video_source = future.result(timeout=10) # Timeout set to 3 seconds
|
||||
cache[query_string] = video_source
|
||||
return redirect(video_source)
|
||||
except concurrent.futures.TimeoutError:
|
||||
# Handle timeout - return a default URL or handle as needed
|
||||
return redirect(base_url)
|
||||
return redirect(video_source["url"])
|
||||
|
||||
# Get the video source sequentially
|
||||
video_source = get_video_source(query_string)
|
||||
|
||||
# Add the new entry to the cache with a timestamp
|
||||
cache[query_string] = {"url": video_source, "timestamp": time.time()}
|
||||
|
||||
return redirect(video_source)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=False, port=8080, host="0.0.0.0")
|
||||
app.run(debug=False, port=8080, host="0.0.0.0")
|
||||
|
Reference in New Issue
Block a user