文件名称:crawler
- 所属分类:
- JSP源码/Java
- 资源属性:
- [Java] [源码]
- 上传时间:
- 2017-07-06
- 文件大小:
- 293kb
- 下载次数:
- 0次
- 提 供 者:
- cyh****
- 相关连接:
- 无
- 下载说明:
- 别用迅雷下载,失败请重下,重下不扣分!
介绍说明--下载内容均来自于网络,请自行研究使用
轻量级爬虫框架,可控制抓取深度
跟踪最初站源
可配置线程池
可配置UserAgent
可决定是否要抽取链接
Bloom Filter
可控制爬取速度
内置UserAgent池
支持Proxy池(Lightweight crawler fr a mework)
跟踪最初站源
可配置线程池
可配置UserAgent
可决定是否要抽取链接
Bloom Filter
可控制爬取速度
内置UserAgent池
支持Proxy池(Lightweight crawler fr a mework)
相关搜索: java
(系统自动生成,下载前可以参看下载内容)
下载文件列表
crawler\.git\COMMIT_EDITMSG
crawler\.git\config
crawler\.git\description
crawler\.git\HEAD
crawler\.git\hooks\applypatch-msg.sample
crawler\.git\hooks\commit-msg.sample
crawler\.git\hooks\post-update.sample
crawler\.git\hooks\pre-applypatch.sample
crawler\.git\hooks\pre-commit.sample
crawler\.git\hooks\pre-push.sample
crawler\.git\hooks\pre-rebase.sample
crawler\.git\hooks\pre-receive.sample
crawler\.git\hooks\prepare-commit-msg.sample
crawler\.git\hooks\update.sample
crawler\.git\index
crawler\.git\info\exclude
crawler\.git\logs\HEAD
crawler\.git\logs\refs\heads\master
crawler\.git\logs\refs\remotes\origin\HEAD
crawler\.git\logs\refs\remotes\origin\master
crawler\.git\objects\0e\647bddcc333f6b3880247bda8fc235ac0e2182
crawler\.git\objects\0e\ff1529d4aafc0b7fee8cc5c11710c411ff1049
crawler\.git\objects\17\40f3f9bd392cdc6676040a09fe311372b44c63
crawler\.git\objects\1f\3f5c61aba632b923539723beabcdfb53fd1c2c
crawler\.git\objects\28\b4241a32241a62492830c19709bf876efd3b12
crawler\.git\objects\2b\ed16deff9625af379ca589242080b1b416ac56
crawler\.git\objects\42\1ec5a6c070ae805fdc272c040f0a4e69830b31
crawler\.git\objects\43\4e944ff61a11b0f188005fb1c94391892595e8
crawler\.git\objects\43\55c49fb0832a3aca63628be3f13c9a40ce6d48
crawler\.git\objects\44\9755a55c847d56b700d4dfc3eb3c23541296a5
crawler\.git\objects\46\6de8bdb63212f2684fbde0304820dab18a2a8e
crawler\.git\objects\47\6258fa73a57192dc4c171e93f983efcdd663cc
crawler\.git\objects\49\8ebf9a094cdb6a471036eb52861af7d14beb2a
crawler\.git\objects\51\213333af825be8712be5ebe28dc3c8c7c0851c
crawler\.git\objects\68\847a3fd41d9cf5d39969d08d32605fa06dd1cb
crawler\.git\objects\8b\0e92450482ee7df6f6afd179fc7d275de0d19e
crawler\.git\objects\95\f436ec2a6980d480c36caf96d481d8c6b9cec4
crawler\.git\objects\ac\607891e9ae1a36452717b3ce07ad29cd73ba28
crawler\.git\objects\bc\cce1efc5cd60b2bf261d0c794379e11373c353
crawler\.git\objects\c1\5c40bebe5d1024c8492ebf2989d3e809e52a60
crawler\.git\objects\c1\84792a75ad3f642d0fae93f6594113d789e85e
crawler\.git\objects\c3\98d43fc729782fbdd80018180c209610d5e75b
crawler\.git\objects\d1\104857893407dd55730d1887da1f27fe83a3fd
crawler\.git\objects\d3\3e215552d0ab78f9ecf84c0dfec27657350446
crawler\.git\objects\e4\0cfa2976419ab4bc04e92c142b6109078de840
crawler\.git\objects\eb\841e38f980a509a86f5765b14fe0222690ff12
crawler\.git\objects\f0\3f54d9ec1c1c3e9b003bfa4fba5009f2a91905
crawler\.git\objects\pack\pack-0e3806fd34dcb57e3095d0706a4ebde66e3a98c1.idx
crawler\.git\objects\pack\pack-0e3806fd34dcb57e3095d0706a4ebde66e3a98c1.pack
crawler\.git\packed-refs
crawler\.git\refs\heads\master
crawler\.git\refs\remotes\origin\HEAD
crawler\.git\refs\remotes\origin\master
crawler\.gitignore
crawler\.idea\compiler.xml
crawler\.idea\copyright\profiles_settings.xml
crawler\.idea\inspectionProfiles\profiles_settings.xml
crawler\.idea\inspectionProfiles\Project_Default.xml
crawler\.idea\kotlinc.xml
crawler\.idea\libraries\Maven__commons_codec_commons_codec_1_9.xml
crawler\.idea\libraries\Maven__commons_io_commons_io_1_3_2.xml
crawler\.idea\libraries\Maven__commons_logging_commons_logging_1_2.xml
crawler\.idea\libraries\Maven__com_google_guava_guava_19_0.xml
crawler\.idea\libraries\Maven__junit_junit_4_11.xml
crawler\.idea\libraries\Maven__log4j_log4j_1_2_17.xml
crawler\.idea\libraries\Maven__mysql_mysql_connector_java_6_0_5.xml
crawler\.idea\libraries\Maven__org_apache_commons_commons_lang3_3_4.xml
crawler\.idea\libraries\Maven__org_apache_httpcomponents_httpclient_4_5_2.xml
crawler\.idea\libraries\Maven__org_apache_httpcomponents_httpcore_4_4_4.xml
crawler\.idea\libraries\Maven__org_hamcrest_hamcrest_core_1_3.xml
crawler\.idea\libraries\Maven__org_jsoup_jsoup_1_9_1.xml
crawler\.idea\libraries\Maven__org_slf4j_slf4j_api_1_7_21.xml
crawler\.idea\libraries\Maven__org_slf4j_slf4j_log4j12_1_7_21.xml
crawler\.idea\misc.xml
crawler\.idea\modules.xml
crawler\.idea\preferred-vcs.xml
crawler\.idea\uiDesigner.xml
crawler\.idea\vcs.xml
crawler\.idea\workspace.xml
crawler\crawler.iml
crawler\pom.xml
crawler\readme.md
crawler\src\main\java\com\cyhone\Configurable.java
crawler\src\main\java\com\cyhone\CrawlConfig.java
crawler\src\main\java\com\cyhone\downloader\Downloader.java
crawler\src\main\java\com\cyhone\downloader\HttpclientDownloader.java
crawler\src\main\java\com\cyhone\downloader\HttpClientFactory.java
crawler\src\main\java\com\cyhone\downloader\PhantomJSDownloader.java
crawler\src\main\java\com\cyhone\downloader\useragent\SimpleUserAgentPool.java
crawler\src\main\java\com\cyhone\downloader\useragent\UserAgentPool.java
crawler\src\main\java\com\cyhone\linkextractor\JsoupLinkExtractor.java
crawler\src\main\java\com\cyhone\linkextractor\LinkExtractor.java
crawler\src\main\java\com\cyhone\model\Page.java
crawler\src\main\java\com\cyhone\model\Request.java
crawler\src\main\java\com\cyhone\model\Site.java
crawler\src\main\java\com\cyhone\pipeline\MirrorPipeLine.java
crawler\src\main\java\com\cyhone\pipeline\PipeLine.java
crawler\src\main\java\com\cyhone\processor\Processor.java
crawler\src\main\java\com\cyhone\scheduler\DistinctSchedulerAdpter.java
crawler\src\main\java\com\cyhone\scheduler\duplicate\BloomFilterRemover.java
crawler\.git\config
crawler\.git\description
crawler\.git\HEAD
crawler\.git\hooks\applypatch-msg.sample
crawler\.git\hooks\commit-msg.sample
crawler\.git\hooks\post-update.sample
crawler\.git\hooks\pre-applypatch.sample
crawler\.git\hooks\pre-commit.sample
crawler\.git\hooks\pre-push.sample
crawler\.git\hooks\pre-rebase.sample
crawler\.git\hooks\pre-receive.sample
crawler\.git\hooks\prepare-commit-msg.sample
crawler\.git\hooks\update.sample
crawler\.git\index
crawler\.git\info\exclude
crawler\.git\logs\HEAD
crawler\.git\logs\refs\heads\master
crawler\.git\logs\refs\remotes\origin\HEAD
crawler\.git\logs\refs\remotes\origin\master
crawler\.git\objects\0e\647bddcc333f6b3880247bda8fc235ac0e2182
crawler\.git\objects\0e\ff1529d4aafc0b7fee8cc5c11710c411ff1049
crawler\.git\objects\17\40f3f9bd392cdc6676040a09fe311372b44c63
crawler\.git\objects\1f\3f5c61aba632b923539723beabcdfb53fd1c2c
crawler\.git\objects\28\b4241a32241a62492830c19709bf876efd3b12
crawler\.git\objects\2b\ed16deff9625af379ca589242080b1b416ac56
crawler\.git\objects\42\1ec5a6c070ae805fdc272c040f0a4e69830b31
crawler\.git\objects\43\4e944ff61a11b0f188005fb1c94391892595e8
crawler\.git\objects\43\55c49fb0832a3aca63628be3f13c9a40ce6d48
crawler\.git\objects\44\9755a55c847d56b700d4dfc3eb3c23541296a5
crawler\.git\objects\46\6de8bdb63212f2684fbde0304820dab18a2a8e
crawler\.git\objects\47\6258fa73a57192dc4c171e93f983efcdd663cc
crawler\.git\objects\49\8ebf9a094cdb6a471036eb52861af7d14beb2a
crawler\.git\objects\51\213333af825be8712be5ebe28dc3c8c7c0851c
crawler\.git\objects\68\847a3fd41d9cf5d39969d08d32605fa06dd1cb
crawler\.git\objects\8b\0e92450482ee7df6f6afd179fc7d275de0d19e
crawler\.git\objects\95\f436ec2a6980d480c36caf96d481d8c6b9cec4
crawler\.git\objects\ac\607891e9ae1a36452717b3ce07ad29cd73ba28
crawler\.git\objects\bc\cce1efc5cd60b2bf261d0c794379e11373c353
crawler\.git\objects\c1\5c40bebe5d1024c8492ebf2989d3e809e52a60
crawler\.git\objects\c1\84792a75ad3f642d0fae93f6594113d789e85e
crawler\.git\objects\c3\98d43fc729782fbdd80018180c209610d5e75b
crawler\.git\objects\d1\104857893407dd55730d1887da1f27fe83a3fd
crawler\.git\objects\d3\3e215552d0ab78f9ecf84c0dfec27657350446
crawler\.git\objects\e4\0cfa2976419ab4bc04e92c142b6109078de840
crawler\.git\objects\eb\841e38f980a509a86f5765b14fe0222690ff12
crawler\.git\objects\f0\3f54d9ec1c1c3e9b003bfa4fba5009f2a91905
crawler\.git\objects\pack\pack-0e3806fd34dcb57e3095d0706a4ebde66e3a98c1.idx
crawler\.git\objects\pack\pack-0e3806fd34dcb57e3095d0706a4ebde66e3a98c1.pack
crawler\.git\packed-refs
crawler\.git\refs\heads\master
crawler\.git\refs\remotes\origin\HEAD
crawler\.git\refs\remotes\origin\master
crawler\.gitignore
crawler\.idea\compiler.xml
crawler\.idea\copyright\profiles_settings.xml
crawler\.idea\inspectionProfiles\profiles_settings.xml
crawler\.idea\inspectionProfiles\Project_Default.xml
crawler\.idea\kotlinc.xml
crawler\.idea\libraries\Maven__commons_codec_commons_codec_1_9.xml
crawler\.idea\libraries\Maven__commons_io_commons_io_1_3_2.xml
crawler\.idea\libraries\Maven__commons_logging_commons_logging_1_2.xml
crawler\.idea\libraries\Maven__com_google_guava_guava_19_0.xml
crawler\.idea\libraries\Maven__junit_junit_4_11.xml
crawler\.idea\libraries\Maven__log4j_log4j_1_2_17.xml
crawler\.idea\libraries\Maven__mysql_mysql_connector_java_6_0_5.xml
crawler\.idea\libraries\Maven__org_apache_commons_commons_lang3_3_4.xml
crawler\.idea\libraries\Maven__org_apache_httpcomponents_httpclient_4_5_2.xml
crawler\.idea\libraries\Maven__org_apache_httpcomponents_httpcore_4_4_4.xml
crawler\.idea\libraries\Maven__org_hamcrest_hamcrest_core_1_3.xml
crawler\.idea\libraries\Maven__org_jsoup_jsoup_1_9_1.xml
crawler\.idea\libraries\Maven__org_slf4j_slf4j_api_1_7_21.xml
crawler\.idea\libraries\Maven__org_slf4j_slf4j_log4j12_1_7_21.xml
crawler\.idea\misc.xml
crawler\.idea\modules.xml
crawler\.idea\preferred-vcs.xml
crawler\.idea\uiDesigner.xml
crawler\.idea\vcs.xml
crawler\.idea\workspace.xml
crawler\crawler.iml
crawler\pom.xml
crawler\readme.md
crawler\src\main\java\com\cyhone\Configurable.java
crawler\src\main\java\com\cyhone\CrawlConfig.java
crawler\src\main\java\com\cyhone\downloader\Downloader.java
crawler\src\main\java\com\cyhone\downloader\HttpclientDownloader.java
crawler\src\main\java\com\cyhone\downloader\HttpClientFactory.java
crawler\src\main\java\com\cyhone\downloader\PhantomJSDownloader.java
crawler\src\main\java\com\cyhone\downloader\useragent\SimpleUserAgentPool.java
crawler\src\main\java\com\cyhone\downloader\useragent\UserAgentPool.java
crawler\src\main\java\com\cyhone\linkextractor\JsoupLinkExtractor.java
crawler\src\main\java\com\cyhone\linkextractor\LinkExtractor.java
crawler\src\main\java\com\cyhone\model\Page.java
crawler\src\main\java\com\cyhone\model\Request.java
crawler\src\main\java\com\cyhone\model\Site.java
crawler\src\main\java\com\cyhone\pipeline\MirrorPipeLine.java
crawler\src\main\java\com\cyhone\pipeline\PipeLine.java
crawler\src\main\java\com\cyhone\processor\Processor.java
crawler\src\main\java\com\cyhone\scheduler\DistinctSchedulerAdpter.java
crawler\src\main\java\com\cyhone\scheduler\duplicate\BloomFilterRemover.java