This commit is contained in:
liushuang 2024-08-13 11:43:25 +08:00
commit a3cf0aa893
21 changed files with 2327 additions and 0 deletions

17
.gitignore vendored Normal file

@ -0,0 +1,17 @@
# Logs
logs
*.log
npm-debug.log*
# Runtime data
pids
*.pid
*.seed
.idea
lib-cov
coverage
.grunt
.lock-wscript
build/Release
node_modules

22
LICENSE.md Normal file

@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2019 Ahmed Ibrhim
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

28
README.md Normal file

@ -0,0 +1,28 @@
## Website Downloader
Download the complete source code of any website (including all assets)
## Description
Website downloader works with `wget` and `archiver` to download all websites assets and compress then sends it back to the user through socket channel
**wget params the being used**
`wget --mirror --convert-links --adjust-extension --page-requisites
--no-parent http://example.org`
**Explanation of the various flags:**
- --mirror Makes (among other things) the download recursive.
- --convert-links convert all the links (also to stuff like CSS stylesheets) to relative, so it will be suitable for offline viewing.
- --adjust-extension Adds suitable extensions to filenames (html or css) depending on their content-type.
- --page-requisites Download things like CSS style-sheets and images required to properly display the page offline.
- --no-parent When recursing do not ascend to the parent directory. It useful for restricting the download to only a portion of the site
## Run
- `$ npm install`
- `$ npm start`
- `http://localhost:3000/`

41
app.js Normal file

@ -0,0 +1,41 @@
var createError = require('http-errors');
var express = require('express');
var path = require('path');
var cookieParser = require('cookie-parser');
var logger = require('morgan');
var indexRouter = require('./routes/index');
var usersRouter = require('./routes/users');
var app = express();
// view engine setup
app.set('views', path.join(__dirname, 'views'));
app.set('view engine', 'hbs');
app.use(logger('dev'));
app.use(express.json());
app.use(express.urlencoded({ extended: false }));
app.use(cookieParser());
app.use(express.static(path.join(__dirname, 'public')));
app.use('/', indexRouter);
app.use('/users', usersRouter);
// catch 404 and forward to error handler
app.use(function(req, res, next) {
next(createError(404));
});
// error handler
app.use(function(err, req, res, next) {
// set locals, only providing error in development
res.locals.message = err.message;
res.locals.error = req.app.get('env') === 'development' ? err : {};
// render the error page
res.status(err.status || 500);
res.render('error');
});
module.exports = app;

3
app.json Normal file

@ -0,0 +1,3 @@
{
"stack": "heroku-22"
}

53
archiver/index.js Normal file

@ -0,0 +1,53 @@
var archiver = require("archiver");
var fs = require("fs");
module.exports = (file, io, data) => {
console.log("--------- file:", file);
var output = fs.createWriteStream("./public/sites/" + file + ".zip");
var archive = archiver("zip", {
zlib: { level: 9 }, // Sets the compression level.
});
// listen for all archive data to be written
// 'close' event is fired only when a file descriptor is involved
output.on("close", function () {
console.log(archive.pointer() + " total bytes");
console.log(
"archiver has been finalized and the output file descriptor has closed."
);
io.emit(data.token, { progress: "Completed", file });
});
// This event is fired when the data source is drained no matter what was the data source.
// It is not part of this library but rather from the NodeJS Stream API.
// @see: https://nodejs.org/api/stream.html#stream_event_end
output.on("end", function () {
console.log("Data has been drained");
});
// good practice to catch warnings (ie stat failures and other non-blocking errors)
archive.on("warning", function (err) {
if (err.code === "ENOENT") {
// log warning
} else {
// throw error
throw err;
}
});
// good practice to catch this error explicitly
archive.on("error", function (err) {
throw err;
});
// pipe archive data to the file
archive.pipe(output);
// append files from a sub-directory and naming it `new-subdir` within the archive
archive.directory("./" + file, false);
// finalize the archive (ie we are done appending files but streams have to finish yet)
// 'close', 'end' or 'finish' may be fired right after calling this method so register to them beforehand
archive.finalize();
};

90
bin/www Normal file

@ -0,0 +1,90 @@
#!/usr/bin/env node
/**
* Module dependencies.
*/
var app = require('../app');
var debug = require('debug')('website-downloader:server');
var http = require('http');
/**
* Get port from environment and store in Express.
*/
var port = normalizePort(process.env.PORT || '3002');
app.set('port', port);
/**
* Create HTTP server.
*/
var server = http.createServer(app);
var io = require('socket.io')(server);
// Pass socket Object to it's modula
require('../socket/socket')(io)
/**
* Listen on provided port, on all network interfaces.
*/
server.listen(port);
server.on('error', onError);
server.on('listening', onListening);
/**
* Normalize a port into a number, string, or false.
*/
function normalizePort(val) {
var port = parseInt(val, 10);
if (isNaN(port)) {
// named pipe
return val;
}
if (port >= 0) {
// port number
return port;
}
return false;
}
/**
* Event listener for HTTP server "error" event.
*/
function onError(error) {
if (error.syscall !== 'listen') {
throw error;
}
var bind = typeof port === 'string'
? 'Pipe ' + port
: 'Port ' + port;
// handle specific listen errors with friendly messages
switch (error.code) {
case 'EACCES':
console.error(bind + ' requires elevated privileges');
process.exit(1);
break;
case 'EADDRINUSE':
console.error(bind + ' is already in use');
process.exit(1);
break;
default:
throw error;
}
}
/**
* Event listener for HTTP server "listening" event.
*/
function onListening() {
var addr = server.address();
var bind = typeof addr === 'string'
? 'pipe ' + addr
: 'port ' + addr.port;
debug('Listening on ' + bind);
}

1662
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

21
package.json Normal file

@ -0,0 +1,21 @@
{
"name": "website-downloader",
"version": "0.0.1",
"private": true,
"scripts": {
"start": "node ./bin/www",
"dev": "export NODE_ENV=Development && npm start"
},
"dependencies": {
"archiver": "^3.1.1",
"cookie-parser": "~1.4.4",
"debug": "~2.6.9",
"express": "~4.17.3",
"hbs": "~4.2.0",
"http-errors": "~1.6.3",
"jszip": "^3.8.0",
"morgan": "~1.9.1",
"socket.io": "^2.5.0",
"socket.io-client": "^2.5.0"
}
}

9
public/stylesheets/bootstrap.min.css vendored Normal file

File diff suppressed because one or more lines are too long

@ -0,0 +1,199 @@
header [role="progressbar"][aria-busy="true"] {
position: absolute;
top: 0;
left: 0;
padding-top: 8px;
width: 100%;
background-color: #159756;
-webkit-animation: preloader-background linear 3.5s infinite;
animation: preloader-background linear 3.5s infinite;
}
header [role="progressbar"][aria-busy="true"]::before, header [role="progressbar"][aria-busy="true"]::after {
display: block;
position: absolute;
top: 0;
z-index: 2;
width: 0;
height: 8px;
background: #afa;
-webkit-animation: preloader-front linear 3.5s infinite;
animation: preloader-front linear 3.5s infinite;
content: '';
}
header [role="progressbar"][aria-busy="true"]::before {
right: 50%;
}
header [role="progressbar"][aria-busy="true"]::after {
left: 50%;
}
@-webkit-keyframes preloader-background {
0%, 24.9% {
background-color: #159756;
}
25%, 49.9% {
background-color: #da4733;
}
50%, 74.9% {
background-color: #3b78e7;
}
75%, 100% {
background-color: #fdba2c;
}
}
@keyframes preloader-background {
0%, 24.9% {
background-color: #159756;
}
25%, 49.9% {
background-color: #da4733;
}
50%, 74.9% {
background-color: #3b78e7;
}
75%, 100% {
background-color: #fdba2c;
}
}
@-webkit-keyframes preloader-front {
0% {
width: 0;
background-color: #da4733;
}
24.9% {
width: 50%;
background-color: #da4733;
}
25% {
width: 0;
background-color: #3b78e7;
}
49.9% {
width: 50%;
background-color: #3b78e7;
}
50% {
width: 0;
background-color: #fdba2c;
}
74.9% {
width: 50%;
background-color: #fdba2c;
}
75% {
width: 0%;
background-color: #159756;
}
100% {
width: 50%;
background-color: #159756;
}
}
@keyframes preloader-front {
0% {
width: 0;
background-color: #da4733;
}
24.9% {
width: 50%;
background-color: #da4733;
}
25% {
width: 0;
background-color: #3b78e7;
}
49.9% {
width: 50%;
background-color: #3b78e7;
}
50% {
width: 0;
background-color: #fdba2c;
}
74.9% {
width: 50%;
background-color: #fdba2c;
}
75% {
width: 0%;
background-color: #159756;
}
100% {
width: 50%;
background-color: #159756;
}
}
* {
box-sizing: border-box;
}
html,
body {
margin: 0;
padding: 0;
height: 100%;
}
body {
display: flex;
flex-direction: column;
align-items: center;
font-family: Avenir Next, Helvetica Neue, Helvetica, Arial, sans-serif;
background-color: #f5f5f5;
}
header {
position: relative;
width: 100%;
height: 60px;
/*box-shadow: 0 2px 2px rgba(0, 0, 0, 0.2);*/
background-color: #fff;
}
main {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
padding: 20px;
flex-grow: 1;
text-align: center;
}
:root main > * + * {
margin-top: 1em;
}
h1 {
margin: 0;
font-size: 2.2em;
font-weight: 200;
}
p {
margin: 0;
font-size: .875em;
font-weight: 400;
}
.log {
width: 400px;
max-width: 100%;
text-align: center;
white-space: nowrap; /* 不换行 */
overflow: hidden; /* 隐藏溢出部分 */
text-overflow: ellipsis; /* 使用省略号表示被省略的部分 */
}
.icon-down {
vertical-align: top !important;
}
.mt-20 {
margin-top: 20px;
}
.form-control:focus {
border-color: #000000 !important;
box-shadow: inset 0 1px 1px rgba(0,0,0,0.075), 0 0 8px rgb(0 0 0 / 60%) !important;
}

1
public/svg/download.svg Normal file

@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1710580884953" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="30189" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M742.4 286.254545c-9.309091-9.309091-20.945455-13.963636-34.909091-13.963636-11.636364 0-25.6 4.654545-34.909091 13.963636l-111.709091 111.709091V69.818182c0-27.927273-20.945455-48.872727-48.872727-48.872727-27.927273 0-48.872727 20.945455-48.872727 48.872727v328.145454L349.090909 286.254545c-9.309091-9.309091-20.945455-13.963636-34.909091-13.963636s-25.6 4.654545-34.909091 13.963636c-9.309091 9.309091-13.963636 20.945455-13.963636 34.909091s4.654545 25.6 13.963636 34.909091l230.4 230.4 230.4-230.4c9.309091-9.309091 13.963636-20.945455 13.963637-34.909091 4.654545-13.963636-2.327273-25.6-11.636364-34.909091zM954.181818 707.490909c0-6.981818 0-11.636364-2.327273-16.290909L854.109091 395.636364c-6.981818-18.618182-25.6-32.581818-46.545455-32.581819h-11.636363c-4.654545 9.309091-9.309091 18.618182-18.618182 25.6l-72.145455 72.145455h65.163637l81.454545 246.690909H169.890909l81.454546-246.690909h65.163636l-72.145455-72.145455c-11.636364-11.636364-20.945455-27.927273-25.6-46.545454l-4.654545 20.945454c-20.945455 0-39.563636 13.963636-46.545455 32.581819L72.145455 693.527273c0 4.654545-2.327273 9.309091-2.327273 13.963636V954.181818c0 27.927273 20.945455 48.872727 48.872727 48.872727h786.618182c27.927273 0 48.872727-20.945455 48.872727-48.872727V707.490909z" fill="#fff" ></path></svg>

After

Width:  |  Height:  |  Size: 1.6 KiB

1
public/svg/webpage.svg Normal file

@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M700.223 7.31l0.292 0.438 215.34 224.404 0.366 0.366 6.579 6.944V947.25c0 41.957-34.21 76.093-76.24 76.093H149.739a76.239 76.239 0 0 1-76.24-76.02V76.02C73.427 34.136 107.709 0 149.739 0h543.394l7.017 7.31z m-321.55 405.9s-67.247 63.887-72.145 112.276c0 0 72.658-101.238 196.994-162.857 0 0-49.632-6.871-124.848 50.582z m369.5 210.225c3.875-63.155-9.867-110.814-29.823-146.338 13.815-13.304 17.616-60.158 17.616-60.158 4.386-49.486-17.981-69.88-17.981-69.88-37.571-39.18-128.503-7.236-128.503-7.236-114.76 37.425-210.004 129.38-210.004 129.38-150.797 135.446-162.346 273.89-162.346 273.89-6.579 95.024 73.608 95.755 73.608 95.755 82.744 4.24 121.631-26.022 126.455-30.188 52.19 28.215 99.41 26.46 99.41 26.46 183.325-11.914 219.946-161.542 219.946-161.542h-121.85C587.288 740.9 505.787 727.45 505.787 727.45 425.6 710.93 424.87 623.435 424.87 623.435h323.303z m-39.837-257.736c39.91 30.846 11.988 92.539 5.994 104.527-40.203-65.787-97.656-83.183-97.656-83.183-121.705 73.242-150.577 108.62-150.577 108.62 28.945-25.218 68.417-23.39 68.417-23.39 90.347 10.233 87.423 88.007 87.423 88.372H424.87c7.31-31.65 18.348-45.173 22.88-49.632-83.33 72.584-134.278 180.547-134.278 180.547 15.35 47 57.161 88.592 91.955 110.375a135.958 135.958 0 0 1-56.284 20.467c-73.607 8.99-71.488-50.144-71.488-50.144 2.193-61.4 62.132-157.01 62.132-157.01 54.822-80.113 130.403-147.434 130.403-147.434 74.704-63.594 139.028-94.367 139.028-94.367 67.614-34.794 99.045-7.821 99.045-7.821z" fill="currColor"></path></svg>

After

Width:  |  Height:  |  Size: 1.8 KiB

1
public/svg/website.svg Normal file

@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1722327668331" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="2692" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><path d="M544.938667 115.04c0 0-91.530667-12.437333-230.197333 90.912 0-0.010667-124.106667 114.826667-133.034667 201.802667C181.706667 407.765333 315.605333 225.738667 544.938667 115.04zM941.024 320.810667c25.429333-23.808 32.565333-108.170667 32.565333-108.170667C981.578667 123.84 940.330667 87.04 940.330667 87.04 871.093333 16.64 703.413333 74.090667 703.413333 74.090667c-211.637333 67.210667-387.285333 232.64-387.285333 232.64C37.973333 550.24 16.693333 799.125333 16.693333 799.125333c-11.946667 170.890667 135.754667 172.16 135.754667 172.16 152.64 7.573333 224.266667-46.858667 233.216-54.272 96.234667 50.656 183.381333 47.594667 183.381333 47.594667C907.050667 943.157333 974.613333 674.122667 974.613333 674.122667l-224.64 0c-50.581333 121.066667-200.938667 96.864-200.938667 96.864-147.733333-29.642667-149.077333-187.04-149.077333-187.04l596.181333 0 0.010667-0.042667C1003.210667 470.474667 977.898667 384.810667 941.024 320.810667zM753.568 158.784C529.088 290.56 475.861333 354.154667 475.861333 354.154667c53.354667-45.312 126.176-41.941333 126.176-41.941333 166.592 18.4 161.162667 158.144 161.28 158.741333L399.936 470.954667c13.578667-56.885333 33.877333-81.162667 42.197333-89.130667-153.802667 130.442667-247.573333 324.490667-247.573333 324.490667 28.202667 84.629333 105.312 159.477333 169.450667 198.464-48.053333 31.648-103.776 36.96-103.776 36.96-135.754667 16.117333-131.765333-90.165333-131.765333-90.165333C132.384 741.226667 242.890667 569.173333 242.890667 569.173333c101.12-144.042667 240.544-265.077333 240.544-265.077333 137.749333-114.005333 256.522667-169.472 256.522667-169.472 124.736-62.56 182.666667-14.154667 182.666667-14.154667l0-0.010667 0.032 0c73.621333 55.402667 22.016 166.368 11.029333 187.989333C859.552 190.24 753.568 158.784 753.568 158.784z" fill="#272636" p-id="2693"></path></svg>

After

Width:  |  Height:  |  Size: 2.1 KiB

9
routes/index.js Normal file

@ -0,0 +1,9 @@
var express = require("express");
var router = express.Router();
/* GET home page. */
router.get("/", function (req, res, next) {
res.render("index", { title: "在线网站下载" });
});
module.exports = router;

9
routes/users.js Normal file

@ -0,0 +1,9 @@
var express = require('express');
var router = express.Router();
/* GET users listing. */
router.get('/', function(req, res, next) {
res.send('respond with a resource');
});
module.exports = router;

12
socket/socket.js Normal file

@ -0,0 +1,12 @@
// Download full website pages.
const wget = require("../wget");
module.exports = (io) => {
io.on("connection", function (socket) {
socket.on("request", function (data) {
console.log("Request connection received %s", data.token);
// now graphing the website
wget(io, data);
});
});
};

3
views/error.hbs Normal file

@ -0,0 +1,3 @@
<h1>{{message}}</h1>
<h2>{{error.status}}</h2>
<pre>{{error.stack}}</pre>

40
views/index.hbs Normal file

@ -0,0 +1,40 @@
<header>
<div
aria-busy="true"
id="progress"
hidden
aria-label="Loading, please wait."
role="progressbar"
></div>
</header>
<main role="main">
<img
src="/svg/webpage.svg"
width="100"
alt="website"
/>
<div style="margin-top: 40px;">
<h1>Website Downloader</h1>
<h5 style="margin-top: 20px;">下载当前网站的所有静态资源,仅供学习使用!</h5>
</div>
<div class="container">
<div class="row">
<div class="col-lg-12" style="float: none; margin: 0 auto;">
<div id="custom-search-input">
<form method="get" class="form" action="/search">
<div class="mt-20" style="display: ruby-text">
<div class="form-group">
<input type="text" class="form-control" id="website" placeholder="https://www.baidu.com">
</div>
<button type="button" class="btn btn-default" id="download" >提 交</button>
</div>
</form>
</div>
</div>
</div>
</div>
<h5 hidden id="nFilesP">下载的文件总数:<span id="nFiles" style="color: red;font-weight: bold">0</span></h5>
<p class="log" id="log"></p>
<button style="display: none" class="btn btn-success"><span> 下 载 </span><img class="icon-down" width="16" src="/svg/download.svg" alt="download" /> </button>
</main>

71
views/layout.hbs Normal file

@ -0,0 +1,71 @@
<!DOCTYPE html>
<html lang="cn">
<head>
<title>{{title}}</title>
<link rel='stylesheet' href='/stylesheets/style.css' />
<link rel="stylesheet" href="/stylesheets/bootstrap.min.css">
<script src="/socket.io/socket.io.js"></script>
</head>
<body>
{{{body}}}
</body>
<script>
let numberOfFiles = 0;
const downloadWebsite = document.getElementsByClassName('btn-success')[0];
// connect to current socket.io server
const socket = io.connect(document.URL);
if(!localStorage['token'])
localStorage['token']=generateToken(20);
// wait for events for this token
socket.on(localStorage['token'],(event)=>{
console.log(event);
document.getElementById('progress').hidden=false;
const log = document.getElementById('log');
if (event.progress==="Converting") {
log.innerHTML=(`<code>100%! 正在压缩中...</code><br>`)
} else if( event.progress==="Completed") {
document.getElementById('progress').hidden=true;
log.innerHTML=(`<code>压缩成功 !</code><br>`)
downloadWebsite.style='display:'
downloadWebsite.onclick=function() {
window.location='/sites/'+event.file+".zip";
}
} else {
if(event.progress.includes('200 OK'))
numberOfFiles++;
document.getElementById('nFilesP').hidden=false;
document.getElementById('nFiles').innerHTML=numberOfFiles
log.innerHTML=(`<code> ${event.progress}</code><br>`)
}
})
// Download a website on click
const downloadBtn = document.getElementById("download");
downloadBtn.onclick=()=>{
const website = document.getElementById('website').value;
if (website) {
console.log("Now downloading the website ... %s",website)
socket.emit('request', { token: localStorage['token'] , website});
} else {
console.log("no url")
}
}
// Generate token for each user for the first time.
function generateToken(n) {
const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
let token = '';
for(let i = 0; i < n; i++) {
token += chars[Math.floor(Math.random() * chars.length)];
}
return token;
}
</script>
</html>

35
wget/index.js Normal file

@ -0,0 +1,35 @@
var util = require('util'),
exec = require('child_process').exec;
var archiver = require('../archiver')
module.exports=(io,data)=>{
// download all website assets
/**
* wget --mirror --convert-links --adjust-extension --page-requisites
* --no-parent http://example.org
* --mirror Makes (among other things) the download recursive.
* --convert-links convert all the links (also to stuff like CSS stylesheets) to relative, so it will be suitable for offline viewing.
* --adjust-extension Adds suitable extensions to filenames (html or css) depending on their content-type.
* --page-requisites Download things like CSS style-sheets and images required to properly display the page offline.
* --no-parent When recurring do not ascend to the parent directory. It useful for restricting the download to only a portion of the site.
*/
let website ="";
const child = exec(`wget -mkEpnp --no-if-modified-since ${data.website}`);
// read stdout from the current child.
child.stderr.on("data",(response)=>{
if(response.startsWith("Resolving "))
{
website= response.substring(response.indexOf('Resolve ')+11,response.indexOf(' ('))
}
io.emit(data.token,{progress:response})
})
child.stderr.on('close',(response)=>{
io.emit(data.token,{progress:"Converting"})
archiver(website,io,data)
})
}