Recently, I needed to download a ton of files and I found trying to do it with a browser or wget wasn't going to work. I wanted a robust, lightweight, efficient tool which I could easily add custom logic URL parsing logic to. I wanted it to be idempotent while supporting resumable downloads that run in the background. I decided to leverage the BITS platform which is really the best of breed downloading method on windows.
I also wanted to see real time statistics as it churned through hundreds of downloads. I also wanted the function to run independent of the scrapper function which was populating the download list file with all the URLs to download. This way I had a means of throttling downloads that efficiently took advantage of my full bandwidth without over saturating it with excess overhead and packet loss.
The solution I came up with leaves out some more advanced features like auto throttling based on tcp statistics and I took a stab at calculating estimated total time of completion and I found without better accounting, the results were garbage. I have a few other ideas I might implement in a future version of this, but I figured I'd just publish what I used to download hundreds of files, over 200gb total successfully.
The script also includes a way to stop all downloads should you need to. Just run "Stop-DownloadFiles"
Ran from ISE:
Ran from standard prompt:
Here's the source code:
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<# | |
BV-MassDownloader.ps1 | |
https://www.bryanvine.com/2019/05/powershell-script-massdownloader.html | |
Author: Bryan Vine | |
Last updated: 05/12/2019 | |
Description: This function leverages BITS service for downloading many files from a source file with URLs, one per line. | |
The function multithreads, runs in the background asyncronously, can add more files to the queue realtime, | |
is idempotent, fault tolerant, can resume partially downloaded files. | |
#> | |
#Requires -version 3 | |
Function Get-DownloadFiles{ | |
<# | |
.SYNOPSIS | |
Downloads all the URLs listed in the source download list and saves them in the target destination. | |
.DESCRIPTION | |
This function leverages BITS service for downloading many files from a source file with URLs, one per line. | |
The function multithreads, runs in the background asyncronously, can add more files to the queue realtime, | |
is idempotent, fault tolerant, can resume partially downloaded files. | |
.PARAMETER source | |
What list to read in for source files. | |
.PARAMETER target | |
Where files will be downloaded to. | |
.PARAMETER extension | |
File extension that's being used to find the file name in the download URLs. | |
.PARAMETER maxconcurrent | |
Max concurrent downloads | |
.EXAMPLE | |
Get-DownloadFiles -source c:\sourceURLlist.txt -destination d:\downloads -extension mp3 | |
This will download all the URL links in c:\sourceULRlist.txt, parse out the mp3 files and save them to d:\downloads | |
.LINK | |
https://www.bryanvine.com/2019/05/powershell-script-massdownloader.html | |
.LINK | |
Start-BitsTransfer | |
.NOTES | |
Author: Bryan Vine | |
Last updated: 05/12/2019 | |
#> | |
[cmdletbinding()] | |
Param( | |
[string]$source = "c:\temp\downloadlist.txt", | |
[string]$target = "C:\Temp\videos", | |
[string]$extension = "mp4", | |
[int]$maxconcurrent = 10 | |
) | |
BEGIN{ | |
Import-Module BitsTransfer | |
#Loads list, removes duplicates & blank lines | |
$list = (gc $source) | select -unique |?{$_ -notlike $null} | |
#initialize working variables | |
$index = 0 | |
$Errors = 0 | |
} | |
PROCESS{ | |
while($index -lt $list.Count -or $transfering.count -gt 0){ | |
#get currently transferring jobs | |
$transfering = Get-BitsTransfer |select -ExpandProperty filelist |select -ExpandProperty remotename | |
#Calculate Progress activity for realtime display | |
$estimatedtotal = ($totalsize / ([math]::Min($maxconcurrent,($list.count - $Errors)))) * ($list.count - $Errors) | |
if($estimatedtotal -le 0){$estimatedtotal =1} | |
$estimatedremaining = $estimatedtotal - $totalcomplete - ($index - $transfering.count - $Errors) * ($totalsize / [math]::Min($maxconcurrent,($list.count - $Errors))) | |
$estimatedpercent = ((($estimatedtotal - $estimatedremaining) / $estimatedtotal) * 100) | |
if($estimatedpercent -lt 0){$estimatedpercent = 0} | |
Write-Progress -Activity "Downloading files from list: $source - saving to $target" -PercentComplete $estimatedpercent -Status ( | |
"Files downloaded: $($index - $transfering.count - $errors)/$($list.count) - " + | |
"Queue: $($transfering.count)/$maxconcurrent - " + | |
"Errors: $Errors - " + | |
"Total Download Speed: $totalspeed MB/sec - " + | |
"Percent: $([math]::Round($estimatedpercent,0)) %") | |
#fix index pointer for idempotency | |
if($index -eq 0){ | |
if($transfering.count -gt 1){ | |
$index = $list.IndexOf($transfering[-1]) + 1 | |
}elseif($transfering.count -eq 1){ | |
$index = $list.IndexOf($transfering) + 1 | |
} | |
} | |
#Stop adding if index is at the end | |
if($index -lt $list.count){ | |
#if queue isn't full, add another download job | |
if($transfering.count -lt $maxconcurrent){ | |
#Extract's file name from the URL | |
$filename = $list[$index].split("/").split("?") | ?{$_ -like "*$extension*"} |?{$_.Length -gt 3} |select -Last 1 | |
#Check if target file was already downloadeded | |
if(!(Test-Path "$target\$filename")){ | |
#Idempotency if script is stopped and re-ran while queue is still downloading | |
if($list[$index] -notin $transfering){ | |
#Add new download job | |
Start-BitsTransfer -Source $list[$index++] -Destination "$target\$filename" -Asynchronous |Out-Null | |
Start-Sleep -Seconds 1 | |
} | |
} | |
} | |
} | |
#Check for completed download jobs, remove from queue | |
Get-BitsTransfer | ?{$_.jobstate -like "transferred"} | Complete-BitsTransfer | |
#Check for errored download jobs, remove from queue | |
Get-BitsTransfer | ?{$_.jobstate -like "*Error*"} | %{ | |
$Errors++ | |
Write-Host -ForegroundColor DarkRed -BackgroundColor Black "Error Downloading: $($_.filelist.RemoteName)" | |
$_ | Remove-BitsTransfer | |
} | |
#Per loop variable initialization | |
$totalspeed = 0 | |
$totalsize = 1 | |
$totalcomplete = 0 | |
$id = 1 | |
#Per file metrics calculation & display | |
Get-BitsTransfer | ?{$_.jobstate -like "Transferring"} | %{ | |
$totalcomplete += $_.BytesTransferred | |
$totalsize += $_.BytesTotal | |
$PercentComplete = [math]::Round(($_.BytesTransferred * 100 / $_.BytesTotal),0) | |
$speed = [math]::Round(($_.BytesTransferred/1MB) / (($_.ModificationTime - $_.CreationTime).TotalSeconds),3) | |
$totalspeed += $speed | |
$timeleft = [math]::Round((($_.BytesTotal - $_.BytesTransferred)/1MB)/$speed,0) | |
$ETA = get-date (get-date).AddSeconds($timeleft) -UFormat "%I:%M:%S %p" | |
Write-Progress -id ($id++) -Activity "Downloading $(($_.filelist.LocalName).replace($target + '\',''))" -PercentComplete $PercentComplete -Status ( | |
"Downloaded: $([math]::Round($_.BytesTransferred/1MB,0))/$([math]::Round($_.BytesTotal/1MB,0))MB - " + | |
"Percent: $PercentComplete % - Speed: $speed MB/sec - " + | |
"Time left: $([timespan]::FromSeconds($timeleft).hours) hours $([timespan]::FromSeconds($timeleft).minutes) minutes $([timespan]::FromSeconds($timeleft).seconds) seconds - ETA: $ETA") | |
} | |
#Re-read file for new downloads | |
$list = (gc $source) | select -unique |?{$_ -notlike $null} | |
#loop delay | |
Start-Sleep -Seconds 1 | |
} | |
} | |
END{ | |
Write-Host -ForegroundColor DarkGreen -BackgroundColor Black "Downloads complete." | |
} | |
} | |
#Alias function to stop all downloads | |
Function Stop-DownloadFiles{ | |
Get-BitsTransfer | Remove-BitsTransfer | |
} | |
Get-DownloadFiles |