Anonymous Anonymous - 4 months ago
375 0

No description


Get all links from list of webpages (specific)

$InputLinksFile = "c:\temp\InputLinks.txt"
$OutputLinksFile = "C:\temp\OutputLinks.txt"
$InputLinks = @()

$BasePage = ""
[int]$FirstPageNumber = "600"
[int]$LastPageNumber = "601"
$CurrentPageNumber = $FirstPageNumber

# Make a list of all the pages we want to input, counting from FirstPageNumber to LastPageNumber
while ($CurrentPageNumber -le $LastPageNumber) {
	$InputLinks += "$BasePage$CurrentPageNumber"

# If you want to manually input a list of pages instead, remove # in front of the next line:
#$InputLinks = Get-Content -Path $InputLinksFile

ForEach ($InputLink in $InputLinks) {
	# Fetch the entire page. Get links in page with ().Links. Page is compressed with gzip, so we'll have to account for that
	$InputPageLinks = (Invoke-WebRequest -Uri $InputLink -Headers @{"Accept-Encoding"="gzip"}).Links
	# Filter the link list to only contain links with the sequence "/1/" in it.
	$FilteredOutputLinks = $InputPageLinks | Where-Object {$_.href -like "*/1/*"}
	# The provided links are relative and not absolute, so we need to add the domain name to the output
	foreach ($OutputLink in $FilteredOutputLinks) {
		$FinalLink = "$($Outputlink.href)"
		Out-File -Append -FilePath $OutputLinksFile -InputObject $FinalLink
	Clear-Variable InputPageLinks
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download