文件名称:heritrixProject
介绍说明--下载内容均来自于网络,请自行研究使用
搜索引擎的一个分支,在搜索网页时首先得用一个爬虫来获取网址该代码的功能就是获取所需网址的-A branch of the search engine, in the search page to use when the first reptiles to obtain a URL of the code of its function is to obtain the required web site
(系统自动生成,下载前可以参看下载内容)
下载文件列表
heritrixProject
...............\.classpath
...............\.myumldata
...............\.project
...............\arcMetaheaderBody.xsl
...............\file
...............\....\chinesecharacters2.txt
...............\....\hash.txt
...............\....\index.ind
...............\....\purewords.txt
...............\....\purewordsnosignal3.txt
...............\fish
...............\....\extractor
...............\....\.........\FishExtractor.class
...............\....\.........\FishExtractor.java
...............\....\index
...............\....\.....\SingleIndex.class
...............\....\.....\SingleIndex.java
...............\....\.....\TestIndex.class
...............\....\.....\TestIndex.java
...............\....\test
...............\....\....\TestBrowse.class
...............\....\....\TestBrowse.java
...............\....\....\TestHTML$1.class
...............\....\....\TestHTML.class
...............\....\....\TestHTML.java
...............\....\tools
...............\....\.....\digui.class
...............\....\.....\digui.java
...............\....\.....\Index.class
...............\....\.....\Index.java
...............\....\.....\SingleIndex.class
...............\....\.....\TestGBK.class
...............\....\.....\TestGBK.java
...............\....\.....\ToPureWords.class
...............\....\.....\ToPureWords.java
...............\....\.....\ToPureWordsNosignal.class
...............\....\.....\ToPureWordsNosignal.java
...............\....\.....\ToWord.class
...............\....\.....\ToWord.java
...............\heritrix.properties
...............\heritrix_dmesg.log
...............\heritrix_out.log
...............\jndi.properties
...............\lib
...............\...\ant-1.6.2.jar
...............\...\bsh-2.0b4.jar
...............\...\commons-cli-1.0.jar
...............\...\commons-codec-1.3.jar
...............\...\commons-collections-3.1.jar
...............\...\commons-httpclient-3.0.1.jar
...............\...\commons-lang-2.1.jar
...............\...\commons-logging-1.0.4.jar
...............\...\commons-net-1.4.1.jar
...............\...\commons-pool-1.3.jar
...............\...\dnsjava-1.6.2.jar
...............\...\fastutil-5.0.3-heritrix-subset-1.0.jar
...............\...\filterbuilder.jar
...............\...\htmllexer.jar
...............\...\htmlparser.jar
...............\...\itext-1.2.0.jar
...............\...\jasper-compiler-tomcat-4.1.30.jar
...............\...\jasper-runtime-tomcat-4.1.30.jar
...............\...\javaswf-CVS-SNAPSHOT-1.jar
...............\...\je-3.0.12.jar
...............\...\jetty-4.2.23.jar
...............\...\junit-3.8.1.jar
...............\...\junit.jar
...............\...\libidn-0.5.9.jar
...............\...\mg4j-1.0.1.jar
...............\...\poi-2.0-RC1-20031102.jar
...............\...\poi-scratchpad-2.0-RC1-20031102.jar
...............\...\sax2.jar
...............\...\servlet-tomcat-4.1.30.jar
...............\...\thumbelina.jar
...............\modules
...............\.......\BaseRule.options
...............\.......\CrawlScope.options
...............\.......\Credential.options
...............\.......\DecideRule.options
...............\.......\Filter.options
...............\.......\Frontier.options
...............\.......\Processor.options
...............\.......\StatisticTracking.options
...............\my
...............\..\extractor
...............\..\.........\Mobile163Extractor.class
...............\..\.........\Mobile163Extractor.java
...............\..\postprocessor
...............\..\.............\FrontierSchedulerFor163Mobile.class
...............\..\.............\FrontierSchedulerFor163Mobile.java
...............\..\.............\FrontierSchedulerForPconlineMobile.class
...............\..\.............\FrontierSchedulerForPconlineMobile.java
...............\..\SohuNewsExtractor.class
...............\..\SohuNewsExtractor.java
...............\org
...............\...\apache
...............\...\......\commons
...............\...\......\.......\httpclient
...............\...\......\.......\..........\cookie
...............\.classpath
...............\.myumldata
...............\.project
...............\arcMetaheaderBody.xsl
...............\file
...............\....\chinesecharacters2.txt
...............\....\hash.txt
...............\....\index.ind
...............\....\purewords.txt
...............\....\purewordsnosignal3.txt
...............\fish
...............\....\extractor
...............\....\.........\FishExtractor.class
...............\....\.........\FishExtractor.java
...............\....\index
...............\....\.....\SingleIndex.class
...............\....\.....\SingleIndex.java
...............\....\.....\TestIndex.class
...............\....\.....\TestIndex.java
...............\....\test
...............\....\....\TestBrowse.class
...............\....\....\TestBrowse.java
...............\....\....\TestHTML$1.class
...............\....\....\TestHTML.class
...............\....\....\TestHTML.java
...............\....\tools
...............\....\.....\digui.class
...............\....\.....\digui.java
...............\....\.....\Index.class
...............\....\.....\Index.java
...............\....\.....\SingleIndex.class
...............\....\.....\TestGBK.class
...............\....\.....\TestGBK.java
...............\....\.....\ToPureWords.class
...............\....\.....\ToPureWords.java
...............\....\.....\ToPureWordsNosignal.class
...............\....\.....\ToPureWordsNosignal.java
...............\....\.....\ToWord.class
...............\....\.....\ToWord.java
...............\heritrix.properties
...............\heritrix_dmesg.log
...............\heritrix_out.log
...............\jndi.properties
...............\lib
...............\...\ant-1.6.2.jar
...............\...\bsh-2.0b4.jar
...............\...\commons-cli-1.0.jar
...............\...\commons-codec-1.3.jar
...............\...\commons-collections-3.1.jar
...............\...\commons-httpclient-3.0.1.jar
...............\...\commons-lang-2.1.jar
...............\...\commons-logging-1.0.4.jar
...............\...\commons-net-1.4.1.jar
...............\...\commons-pool-1.3.jar
...............\...\dnsjava-1.6.2.jar
...............\...\fastutil-5.0.3-heritrix-subset-1.0.jar
...............\...\filterbuilder.jar
...............\...\htmllexer.jar
...............\...\htmlparser.jar
...............\...\itext-1.2.0.jar
...............\...\jasper-compiler-tomcat-4.1.30.jar
...............\...\jasper-runtime-tomcat-4.1.30.jar
...............\...\javaswf-CVS-SNAPSHOT-1.jar
...............\...\je-3.0.12.jar
...............\...\jetty-4.2.23.jar
...............\...\junit-3.8.1.jar
...............\...\junit.jar
...............\...\libidn-0.5.9.jar
...............\...\mg4j-1.0.1.jar
...............\...\poi-2.0-RC1-20031102.jar
...............\...\poi-scratchpad-2.0-RC1-20031102.jar
...............\...\sax2.jar
...............\...\servlet-tomcat-4.1.30.jar
...............\...\thumbelina.jar
...............\modules
...............\.......\BaseRule.options
...............\.......\CrawlScope.options
...............\.......\Credential.options
...............\.......\DecideRule.options
...............\.......\Filter.options
...............\.......\Frontier.options
...............\.......\Processor.options
...............\.......\StatisticTracking.options
...............\my
...............\..\extractor
...............\..\.........\Mobile163Extractor.class
...............\..\.........\Mobile163Extractor.java
...............\..\postprocessor
...............\..\.............\FrontierSchedulerFor163Mobile.class
...............\..\.............\FrontierSchedulerFor163Mobile.java
...............\..\.............\FrontierSchedulerForPconlineMobile.class
...............\..\.............\FrontierSchedulerForPconlineMobile.java
...............\..\SohuNewsExtractor.class
...............\..\SohuNewsExtractor.java
...............\org
...............\...\apache
...............\...\......\commons
...............\...\......\.......\httpclient
...............\...\......\.......\..........\cookie