# https://www.linuxjournal.com/content/downloading-entire-web-site-wget # https://linuxreviews.org/Wget:_download_whole_or_parts_of_websites_with_ease # https://www.webhostface.com/kb/knowledgebase/examples-using-wget/ # "You can replicate the HTML content of a website with the –mirror option (or -m for short) # wget -m http://domain.com" # https://www.linuxquestions.org/questions/linux-server-73/wget-how-to-download-more-than-one-file-at-once-instead-of-file-after-file-704693/ wget.mirror.cmd=wget -Q10m -m %%BASE_URL%% # for downloading a single file wget.file.cmd=wget %%FILE_URL%% # Arbitrary cutoff values for WETProcessor.java WETprocessor.min.content.length=100 WETprocessor.min.line.count=2 WETprocessor.min.content.length.wrapped.line=500 WETprocessor.min.spaces.per.wrapped.line=10 # Arbitrary cutoff values for WETProcessor.java # for determining whether a WET record has sufficient and sensible content WETprocessor.max.word.length=15 WETprocessor.min.num.words=20 WETprocessor.max.words.camelcase=10 mongodb.user=anupama mongodb.pwd=chang3m3 # default mongodb port is 27017. Don't change the port unless you really have configured # your mongodb server to listen at some other port mongodb.port=27017 mongodb.host=mongodb.cms.waikato.ac.nz #mongodb.dbname=ateacrawldata mongodb.dbname=anupama