#!/usr/bin/perl # Please refer to the Plain Old Documentation (POD) at the end of this Perl Script for further information use strict; # SOAP::Lite version 0.52 or newer is recommended by http://code.google.com/apis/soapsearch/api_faq.html#tech20 use SOAP::Lite; use Getopt::Long; use Data::Dumper; my $VERSION = 0.1; # May be required to upload script to CPAN i.e. http://www.cpan.org/scripts/submitting.html print "\n\"Download Indexed Cache\" Proof of Concept (PoC) v0.1 (Released at RUXCON 2K8)\n"; print "\n"; print "Copyright 2008 Christian Heinrich\n"; print "Licensed under the Apache License, Version 2.0\n\n"; # Take the query from the command line my $google_api_key; my $query; my $start; # TODO Input Validation of command line arguments # TODO Display Usage if no command line arguments are specified GetOptions( "key=s" => \$google_api_key, "query=s" => \$query, "start=s" => \$start ); # Process command line arguements $start = $start - 1; chomp($query); # For demonstrations without exposing the Google SOAP Search API insert your Google SOAP Search API Key below to use dic.pl -key "demo" if ( $google_api_key == "demo" ) { # Replace "insert_google_api_key" with your Google SOAP Search API Key # $google_api_key = "insert_google_api_key"; } # strip ":" from Google Search Operator for Filename # TODO Expand this to strip illegal filename chars e.g. \/:*?<>| my $stripped_query = $query; $stripped_query =~ s/://g; my $dir = "$stripped_query/dic"; # The directory which holds the output of dic if ( !( -e $dir ) ) { print("Creating ./$dir\n\n"); if ( !( -e "./$stripped_query" ) ) { mkdir("./$stripped_query"); } system("mkdir $dir"); } else { print "Appending ./$dir\n\n"; } my $google_search_results = do_Google_Search( "$google_api_key", "$query", "$start" ); # TODO Display a warning if and exceeds 1000 open( DATA_DUMPER, ">>./$dir/datadumper.txt" ); print DATA_DUMPER ( Data::Dumper::Dumper($google_search_results) ); # The URL corresponding to the Search Result .html file is listed in this .CSV file open( URL, ">>./$dir/$stripped_query.csv" ); my $google_search_result_number = $start; # Loop through the results. foreach my $google_search_result ( @{ $google_search_results->{resultElements} } ) { # Set the results as variables ++$google_search_result_number; my $URL = $google_search_result->{URL}; my $cachedSize = $google_search_result->{cachedSize}; print( "Downloading " . $URL . " from Google Cache [" . $cachedSize . "] as " . $google_search_result_number . ".html\n" ); my $google_cached_page = doGetCachedPage( "$google_api_key", "$URL" ); open( CACHEDPAGE, ">./$dir/$google_search_result_number.html" ); print CACHEDPAGE $google_cached_page; close(CACHEDPAGE); # TODO Include the date and time the page was indexed i.e. to quote the cache page "It is a snapshot of the page as it appeared on [Date] [Time]" print URL ( "$google_search_result_number" . "," . "$URL\n" ); } sub do_Google_Search { # Variable Naming Convention is as per Google SOAP Search API Reference Documentation my $key = $_[0]; # $q is Google Search Query from Google SOAP Search API Reference # TODO Check length of Google Search Query is 2048 bytes # TODO Check Google Search Query is a maximum of 10 Words # TODO Check only one site: term is in the Google Search Query my $q = $_[1]; # my $start = -start cmd line argument my $start = $_[2]; # TODO Must add a test to ensure that $maxResults is between 1 to 1000 my $maxResults = "10"; # $filter is boolean i.e. either "true" or "false" my $filter = "false"; # TODO Check Country of Restrict # TODO Check Topic of Restrict my $restricts = ""; my $safeSearch = "false"; # TODO Check Language Restrict my $lr = ""; # ie is Input Encoding and this has been deprecated in the Google SOAP Search API my $ie = "UTF-8"; # oe is Output Encoding and this has been deprecated in the Google SOAP Search API my $oe = "UTF-8"; # Location of the GoogleSearch WSDL file my $google_wsdl = "http://api.google.com/GoogleSearch.wsdl"; # Create a new SOAP::Lite instance, feeding it GoogleSearch.wsdl my $google_search = SOAP::Lite->service("$google_wsdl"); # TODO Confirm that connection with api.google.com can be established my $google_search_results = $google_search->doGoogleSearch( $key, $q, $start, $maxResults, $filter, $restricts, $safeSearch, $lr, $ie, $oe ); # TODO Confirm that doGoogleSearchResponse SOAP Message is not empty due to exceeding 10K SOAP Messages with Google SOAP Search API Key return $google_search_results; } sub doGetCachedPage { # Variable Naming Convention is as per Google SOAP Search API Reference Documentation my $key = $_[0]; my $URL = $_[1]; # Location of the GoogleSearch WSDL file my $google_wsdl = "http://api.google.com/GoogleSearch.wsdl"; my $google_cache = SOAP::Lite->service("$google_wsdl"); my $doGetCachedPageResponse = $google_cache->doGetCachedPage( $google_api_key, $URL ); # TODO Confirm that doGetCachedPageResponse SOAP Message is not empty due to exceeding 10K SOAP Messages with Google SOAP Search API Key return $doGetCachedPageResponse; } =head1 NAME dic.pl - "Download Indexed Cache" =head1 VERSION This documentation refers to dic PoC v0.1. Released at RUXCON 2K8 (AU) =head1 USAGE dic.pl -key [key] -query [Google Search Query] -start [Starting Google Search Result Number] =head1 REQUIRED ARGUMENTS -key Google SOAP Search API Key -q Google Search Query -start Starting Google Search Result Number =head1 DESCRIPTION "Download Indexed Cache" implements the Google SOAP Search API to retrieve content indexed within the Google Cache and supports the "Search Engine Reconnaissance" section of the recently released OWASP Testing Guide v3. =head1 DEPENDENCIES =head1 PREREQUISITES SOAP::Lite v0.52 CPAN Module Data::Dumper CPAN Module =head1 COREQUISITES =head1 OSNAMES cygwin =head1 SCRIPT CATEGORIES Web =head1 INCOMPATIBILITIES =head1 BUGS AND LIMITATIONS Please refer to the comments beginning with "TODO" in the Perl Code. =head1 AUTHOR Christian Heinrich =head1 CONTACT INFORMATION christian.heinrich@owasp.org christian.heinrich@cmlh.id.au cmlh@cpan.org http://www.linkedin.com/in/ChristianHeinrich =head1 MAILING LIST https://lists.owasp.org/mailman/listinfo/owasp-google-hacking http://groups.google.com/group/download-indexed-cache =head1 SUBVERSION REPOSITORY # TODO svn propset svn:keywords http://code.google.com/p/dic =head1 FURTHER INFORMATION AND UPDATES http://del.icio.us/cmlh/dic https://lists.owasp.org/mailman/listinfo/owasp-google-hacking http://groups.google.com/group/download-indexed-cache http://code.google.com/p/dic =head1 LICENSE AND COPYRIGHT Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Copyright 2008 Christian Heinrich