SearchCrawler
上传者:刘纯波|上传时间:2015-04-21|密次下载
SearchCrawler
java Swing编写的网络爬虫,来自java变成艺术
import java.awt.*; import java.awt.event.*; import java.io.*; import http://wendang.chazidian.com.*; import java.util.*; import java.util.regex.*; import javax.swing.*; import javax.swing.table.*; // The Search Web Crawler public class SearchCrawler extends JFrame { // Max URLs drop down values. private static final String[] MAX_URLS = {"50", "100", "500", "1000"}; // Cache of robot disallow lists. private HashMap disallowListCache = new HashMap(); // Search GUI controls. private JTextField startTextField; private JComboBox maxComboBox; private JCheckBox limitCheckBox; private JTextField logTextField; private JTextField searchTextField; private JCheckBox caseCheckBox; private JButton searchButton; // Search stats GUI controls. private JLabel crawlingLabel2; private JLabel crawledLabel2; private JLabel toCrawlLabel2; private JProgressBar progressBar; private JLabel matchesLabel2; // Table listing search matches. private JTable table; // Flag for whether or not crawling is underway. private boolean crawling; // Matches log file print writer. private PrintWriter logFileWriter; // Constructor for Search Web Crawler. public SearchCrawler() { // Set application title. setTitle("Search Crawler"); // Set window size. setSize(600, 600); // Handle window closing events. addWindowListener(new WindowAdapter() { public void windowClosing(WindowEvent e) { actionExit(); } }); // Set up file menu. JMenuBar menuBar = new JMenuBar(); JMenu fileMenu = new JMenu("File"); fileMenu.setMnemonic(KeyEvent.VK_F); JMenuItem fileExitMenuItem = new JMenuItem("Exit", KeyEvent.VK_X); fileExitMenuItem.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { actionExit(); } }); fileMenu.add(fileExitMenuItem); menuBar.add(fileMenu); setJMenuBar(menuBar); // Set up search panel. JPanel searchPanel = new JPanel(); GridBagConstraints constraints; GridBagLayout layout = new GridBagLayout(); searchPanel.setLayout(layout); JLabel startLabel = new JLabel("Start URL:"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.EAST; constraints.insets = new Insets(5, 5, 0, 0); layout.setConstraints(startLabel, constraints); searchPanel.add(startLabel); startTextField = new JTextField(); constraints = new GridBagConstraints(); constraints.fill = GridBagConstraints.HORIZONTAL; constraints.gridwidth = GridBagConstraints.REMAINDER; constraints.insets = new Insets(5, 5, 0, 5); layout.setConstraints(startTextField, constraints); searchPanel.add(startTextField); JLabel maxLabel = new JLabel("Max URLs to Crawl:"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.EAST;
java Swing编写的网络爬虫,来自java变成艺术
constraints.insets = new Insets(5, 5, 0, 0); layout.setConstraints(maxLabel, constraints); searchPanel.add(maxLabel); maxComboBox = new JComboBox(MAX_URLS); maxComboBox.setEditable(true); constraints = new GridBagConstraints(); constraints.insets = new Insets(5, 5, 0, 0); layout.setConstraints(maxComboBox, constraints); searchPanel.add(maxComboBox); limitCheckBox = new JCheckBox("Limit crawling to Start URL site"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.WEST; constraints.insets = new Insets(0, 10, 0, 0); layout.setConstraints(limitCheckBox, constraints); searchPanel.add(limitCheckBox); JLabel blankLabel = new JLabel(); constraints = new GridBagConstraints(); constraints.gridwidth = GridBagConstraints.REMAINDER; layout.setConstraints(blankLabel, constraints); searchPanel.add(blankLabel); JLabel logLabel = new JLabel("Matches Log File:"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.EAST; constraints.insets = new Insets(5, 5, 0, 0); layout.setConstraints(logLabel, constraints); searchPanel.add(logLabel); String file = System.getProperty("user.dir") + System.getProperty("file.separator") + "crawler.log"; logTextField = new JTextField(file); constraints = new GridBagConstraints(); constraints.fill = GridBagConstraints.HORIZONTAL; constraints.gridwidth = GridBagConstraints.REMAINDER; constraints.insets = new Insets(5, 5, 0, 5); layout.setConstraints(logTextField, constraints); searchPanel.add(logTextField); JLabel searchLabel = new JLabel("Search String:"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.EAST; constraints.insets = new Insets(5, 5, 0, 0); layout.setConstraints(searchLabel, constraints); searchPanel.add(searchLabel); searchTextField = new JTextField(); constraints = new GridBagConstraints(); constraints.fill = GridBagConstraints.HORIZONTAL; constraints.insets = new Insets(5, 5, 0, 0); constraints.gridwidth= 2; constraints.weightx = 1.0d; layout.setConstraints(searchTextField, constraints); searchPanel.add(searchTextField); caseCheckBox = new JCheckBox("Case Sensitive"); constraints = new GridBagConstraints(); constraints.insets = new Insets(5, 5, 0, 5); constraints.gridwidth = GridBagConstraints.REMAINDER; layout.setConstraints(caseCheckBox, constraints); searchPanel.add(caseCheckBox); searchButton = new JButton("Search"); searchButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { actionSearch(); } }); constraints = new GridBagConstraints(); constraints.gridwidth = GridBagConstraints.REMAINDER; constraints.insets = new Insets(5, 5, 5
java Swing编写的网络爬虫,来自java变成艺术
, 5); layout.setConstraints(searchButton, constraints); searchPanel.add(searchButton); JSeparator separator = new JSeparator(); constraints = new GridBagConstraints(); constraints.fill = GridBagConstraints.HORIZONTAL; constraints.gridwidth = GridBagConstraints.REMAINDER; constraints.insets = new Insets(5, 5, 5, 5); layout.setConstraints(separator, constraints); searchPanel.add(separator); JLabel crawlingLabel1 = new JLabel("Crawling:"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.EAST; constraints.insets = new Insets(5, 5, 0, 0); layout.setConstraints(crawlingLabel1, constraints); searchPanel.add(crawlingLabel1); crawlingLabel2 = new JLabel(); crawlingLabel2.setFont( crawlingLabel2.getFont().deriveFont(Font.PLAIN)); constraints = new GridBagConstraints(); constraints.fill = GridBagConstraints.HORIZONTAL; constraints.gridwidth = GridBagConstraints.REMAINDER; constraints.insets = new Insets(5, 5, 0, 5); layout.setConstraints(crawlingLabel2, constraints); searchPanel.add(crawlingLabel2); JLabel crawledLabel1 = new JLabel("Crawled URLs:"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.EAST; constraints.insets = new Insets(5, 5, 0, 0); layout.setConstraints(crawledLabel1, constraints); searchPanel.add(crawledLabel1); crawledLabel2 = new JLabel(); crawledLabel2.setFont( crawledLabel2.getFont().deriveFont(Font.PLAIN)); constraints = new GridBagConstraints(); constraints.fill = GridBagConstraints.HORIZONTAL; constraints.gridwidth = GridBagConstraints.REMAINDER; constraints.insets = new Insets(5, 5, 0, 5); layout.setConstraints(crawledLabel2, constraints); searchPanel.add(crawledLabel2); JLabel toCrawlLabel1 = new JLabel("URLs to Crawl:"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.EAST; constraints.insets = new Insets(5, 5, 0, 0); layout.setConstraints(toCrawlLabel1, constraints); searchPanel.add(toCrawlLabel1); toCrawlLabel2 = new JLabel(); toCrawlLabel2.setFont( toCrawlLabel2.getFont().deriveFont(Font.PLAIN)); constraints = new GridBagConstraints(); constraints.fill = GridBagConstraints.HORIZONTAL; constraints.gridwidth = GridBagConstraints.REMAINDER; constraints.insets = new Insets(5, 5, 0, 5); layout.setConstraints(toCrawlLabel2, constraints); searchPanel.add(toCrawlLabel2); JLabel progressLabel = new JLabel("Crawling Progress:"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.EAST; constraints.insets = new Insets(5, 5, 0, 0); layout.setConstraints(progressLabel, constraints); searchPanel.add(progressLabel); progressBar = new JProgressBar(); progressBar.setMinimum(0); progressBar.
java Swing编写的网络爬虫,来自java变成艺术
setStringPainted(true); constraints = new GridBagConstraints(); constraints.fill = GridBagConstraints.HORIZONTAL; constraints.gridwidth = GridBagConstraints.REMAINDER; constraints.insets = new Insets(5, 5, 0, 5); layout.setConstraints(progressBar, constraints); searchPanel.add(progressBar); JLabel matchesLabel1 = new JLabel("Search Matches:"); constraints = new GridBagConstraints(); constraints.anchor = GridBagConstraints.EAST; constraints.insets = new Insets(5, 5, 10, 0); layout.setConstraints(matchesLabel1, constraints); searchPanel.add(matchesLabel1); matchesLabel2 = new JLabel(); matchesLabel2.setFont( matchesLabel2.getFont().deriveFont(Font.PLAIN)); constraints = new GridBagConstraints(); constraints.fill = GridBagConstraints.HORIZONTAL; constraints.gridwidth = GridBagConstraints.REMAINDER; constraints.insets = new Insets(5, 5, 10, 5); layout.setConstraints(matchesLabel2, constraints); searchPanel.add(matchesLabel2); // Set up matches table. table = new JTable(new DefaultTableModel(new Object[][]{}, new String[]{"URL"}) { public boolean isCellEditable(int row, int column) { return false; } }); // Set up matches panel. JPanel matchesPanel = new JPanel(); matchesPanel.setBorder( BorderFactory.createTitledBorder("Matches")); matchesPanel.setLayout(new BorderLayout()); matchesPanel.add(new JScrollPane(table), BorderLayout.CENTER); // Add panels to display. getContentPane().setLayout(new BorderLayout()); getContentPane().add(searchPanel, BorderLayout.NORTH); getContentPane().add(matchesPanel, BorderLayout.CENTER); } // Exit this program. private void actionExit() { System.exit(0); } // Handle search/stop button being clicked. private void actionSearch() { // If stop button clicked, turn crawling flag off. if (crawling) { crawling = false; return; } ArrayList errorList = new ArrayList(); // Validate that start URL has been entered. String startUrl = startTextField.getText().trim(); if (startUrl.length() < 1) { errorList.add("Missing Start URL."); } // Verify start URL. else if (verifyUrl(startUrl) == null) { errorList.add("Invalid Start URL."); } // Validate that max URLs is either empty or is a number. int maxUrls = 0; String max = ((String) maxComboBox.getSelectedItem()).trim(); if (max.length() > 0) { try { maxUrls = Integer.parseInt(max); } catch (NumberFormatException e) { } if (maxUrls < 1) { errorList.add("Invalid Max URLs value."); } } // Validate that matches log file has been entered. String logFile = logTextField.getText().trim(); if (logFile.length() < 1) { errorList.add("Missing Matches L
java Swing编写的网络爬虫,来自java变成艺术
og File."); } // Validate that search string has been entered. String searchString = searchTextField.getText().trim(); if (searchString.length() < 1) { errorList.add("Missing Search String."); } // Show errors, if any, and return. if (errorList.size() > 0) { StringBuffer message = new StringBuffer(); // Concatenate errors into single message. for (int i = 0; i < errorList.size(); i++) { message.append(errorList.get(i)); if (i + 1 < errorList.size()) { message.append("\n"); } } showError(message.toString()); return; } // Remove "www" from start URL if present. startUrl = removeWwwFromUrl(startUrl); // Start the search crawler. search(logFile, startUrl, maxUrls, searchString); } private void search(final String logFile, final String startUrl, final int maxUrls, final String searchString) { // Start the search in a new thread. Thread thread = new Thread(new Runnable() { public void run() { // Show hour glass cursor while crawling is under way. setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); // Disable search controls. startTextField.setEnabled(false); maxComboBox.setEnabled(false); limitCheckBox.setEnabled(false); logTextField.setEnabled(false); searchTextField.setEnabled(false); caseCheckBox.setEnabled(false); // Switch search button to "Stop." searchButton.setText("Stop"); // Reset stats. table.setModel(new DefaultTableModel(new Object[][]{}, new String[]{"URL"}) { public boolean isCellEditable(int row, int column) { return false; } }); updateStats(startUrl, 0, 0, maxUrls); // Open matches log file. try { logFileWriter = new PrintWriter(new FileWriter(logFile)); } catch (Exception e) { showError("Unable to open matches log file."); return; } // Turn crawling flag on. crawling = true; // Perform the actual crawling. crawl(startUrl, maxUrls, limitCheckBox.isSelected(), searchString, caseCheckBox.isSelected()); // Turn crawling flag off. crawling = false; // Close matches log file. try { logFileWriter.close(); } catch (Exception e) { showError("Unable to close matches log file."); } // Mark search as done. crawlingLabel2.setText("Done"); // Enable search controls. startTextField.setEnabled(true); maxComboBox.setEnabled(true); limitCheckBox.setEnabled(true); logTextField.setEnabled(true); searchTextField.setEnabled(true); caseCheckBox.setEnabled(true); //
下载文档
热门试卷
- 2016年四川省内江市中考化学试卷
- 广西钦州市高新区2017届高三11月月考政治试卷
- 浙江省湖州市2016-2017学年高一上学期期中考试政治试卷
- 浙江省湖州市2016-2017学年高二上学期期中考试政治试卷
- 辽宁省铁岭市协作体2017届高三上学期第三次联考政治试卷
- 广西钦州市钦州港区2016-2017学年高二11月月考政治试卷
- 广西钦州市钦州港区2017届高三11月月考政治试卷
- 广西钦州市钦州港区2016-2017学年高一11月月考政治试卷
- 广西钦州市高新区2016-2017学年高二11月月考政治试卷
- 广西钦州市高新区2016-2017学年高一11月月考政治试卷
- 山东省滨州市三校2017届第一学期阶段测试初三英语试题
- 四川省成都七中2017届高三一诊模拟考试文科综合试卷
- 2017届普通高等学校招生全国统一考试模拟试题(附答案)
- 重庆市永川中学高2017级上期12月月考语文试题
- 江西宜春三中2017届高三第一学期第二次月考文科综合试题
- 内蒙古赤峰二中2017届高三上学期第三次月考英语试题
- 2017年六年级(上)数学期末考试卷
- 2017人教版小学英语三年级上期末笔试题
- 江苏省常州西藏民族中学2016-2017学年九年级思想品德第一学期第二次阶段测试试卷
- 重庆市九龙坡区七校2016-2017学年上期八年级素质测查(二)语文学科试题卷
- 江苏省无锡市钱桥中学2016年12月八年级语文阶段性测试卷
- 江苏省无锡市钱桥中学2016-2017学年七年级英语12月阶段检测试卷
- 山东省邹城市第八中学2016-2017学年八年级12月物理第4章试题(无答案)
- 【人教版】河北省2015-2016学年度九年级上期末语文试题卷(附答案)
- 四川省简阳市阳安中学2016年12月高二月考英语试卷
- 四川省成都龙泉中学高三上学期2016年12月月考试题文科综合能力测试
- 安徽省滁州中学2016—2017学年度第一学期12月月考高三英语试卷
- 山东省武城县第二中学2016.12高一年级上学期第二次月考历史试题(必修一第四、五单元)
- 福建省四地六校联考2016-2017学年上学期第三次月考高三化学试卷
- 甘肃省武威第二十三中学2016—2017学年度八年级第一学期12月月考生物试卷
网友关注
- 2018辽宁公务员面试中情景模拟题:巧用生活智慧
- 2018辽宁公务员考试:行测常识判断模拟题6
- 2018辽宁公务员考试申论案例分析模拟题:从普通市民中海选“布衣参事”
- 2018辽宁公务员考试:行测每日一练常识判断练习题
- 2018辽宁公务员考试:面试每日一练结构化面试模拟题
- 2018辽宁公务员考试申论模拟题:拟写一份村规民约
- 2018辽宁公务员考试:行测每日一练练习题答案04.18
- 2018辽宁公务员面试模拟题:“打伞哥”火爆朋友圈
- 2018辽宁公务员考试:行测每日一练类比推理练习题
- 2018辽宁公务员考试:行测常识判断模拟题答案3
- 2018辽宁公务员考试:行测每日一练练习题04.18
- 2018辽宁公务员考试面试热点模拟题:谈一下对资源分配的认识
- 2018辽宁公务员考试申论模拟题:信仰的力量
- 2018辽宁公务员考试面试热点模拟题:农村人居环境整治三年行动方案
- 2018辽宁公务员考试:行测常识判断模拟题4
- 2018辽宁公务员考试申论模拟题:网络文学发展面临的问题
- 2018辽宁公务员考试:行测每日一练判断推理练习题
- 2018辽宁公务员面试模拟题:如何看待“随手拍”
- 2017辽宁公务员考试申论真题
- 2018辽宁公务员考试:行测每日一练判断推理练习题答案
- 2018辽宁公务员考试:面试每日一练结构化面试模拟题答案
- 2018辽宁公务员考试面试热点模拟题:“儿童邪典视频”事件
- 2018辽宁公务员考试:行测每日一练类比推理练习题答案
- 2018辽宁公务员考试行测演练厅之生活常识模拟题
- 2018辽宁公务员考试:行测常识判断模拟题3
- 2017辽宁省考行测真题“两宗最”
- 2018辽宁公务员考试面试热点模拟题:“节后空巢症”怎么治?
- 2018辽宁公务员考试申论模拟题:当代国人价值观存在的问题
- 2018辽宁公务员考试申论模拟题:“双微”电子政务的意义
- 2018辽宁公务员考试申论案例分析模拟题:廉政建设
网友关注视频
- 沪教版牛津小学英语(深圳用) 五年级下册 Unit 12
- 冀教版英语四年级下册第二课
- 冀教版小学数学二年级下册第二单元《租船问题》
- 七年级下册外研版英语M8U2reading
- 七年级英语下册 上海牛津版 Unit5
- 冀教版英语五年级下册第二课课程解读
- 化学九年级下册全册同步 人教版 第22集 酸和碱的中和反应(一)
- 外研版英语七年级下册module3 unit2第一课时
- 【部编】人教版语文七年级下册《老山界》优质课教学视频+PPT课件+教案,安徽省
- 第19课 我喜欢的鸟_第一课时(二等奖)(人美杨永善版二年级下册)_T644386
- 外研版英语七年级下册module1unit3名词性物主代词讲解
- 三年级英语单词记忆下册(沪教版)第一二单元复习
- 【部编】人教版语文七年级下册《过松源晨炊漆公店(其五)》优质课教学视频+PPT课件+教案,江苏省
- 沪教版牛津小学英语(深圳用) 四年级下册 Unit 8
- 沪教版牛津小学英语(深圳用) 四年级下册 Unit 4
- 【部编】人教版语文七年级下册《泊秦淮》优质课教学视频+PPT课件+教案,辽宁省
- 第五单元 民族艺术的瑰宝_16. 形形色色的民族乐器_第一课时(岭南版六年级上册)_T1406126
- 北师大版小学数学四年级下册第15课小数乘小数一
- 沪教版牛津小学英语(深圳用) 四年级下册 Unit 2
- 冀教版小学数学二年级下册第二单元《有余数除法的竖式计算》
- 3.2 数学二年级下册第二单元 表内除法(一)整理和复习 李菲菲
- 冀教版小学英语四年级下册Lesson2授课视频
- 19 爱护鸟类_第一课时(二等奖)(桂美版二年级下册)_T502436
- 冀教版小学数学二年级下册第二周第2课时《我们的测量》宝丰街小学庞志荣.mp4
- 冀教版小学数学二年级下册第二单元《有余数除法的简单应用》
- 沪教版牛津小学英语(深圳用) 四年级下册 Unit 12
- 3月2日小学二年级数学下册(数一数)
- 【部编】人教版语文七年级下册《逢入京使》优质课教学视频+PPT课件+教案,辽宁省
- 外研版英语三起5年级下册(14版)Module3 Unit1
- 第12章 圆锥曲线_12.7 抛物线的标准方程_第一课时(特等奖)(沪教版高二下册)_T274713
精品推荐
- 2016-2017学年高一语文人教版必修一+模块学业水平检测试题(含答案)
- 广西钦州市高新区2017届高三11月月考政治试卷
- 浙江省湖州市2016-2017学年高一上学期期中考试政治试卷
- 浙江省湖州市2016-2017学年高二上学期期中考试政治试卷
- 辽宁省铁岭市协作体2017届高三上学期第三次联考政治试卷
- 广西钦州市钦州港区2016-2017学年高二11月月考政治试卷
- 广西钦州市钦州港区2017届高三11月月考政治试卷
- 广西钦州市钦州港区2016-2017学年高一11月月考政治试卷
- 广西钦州市高新区2016-2017学年高二11月月考政治试卷
- 广西钦州市高新区2016-2017学年高一11月月考政治试卷
分类导航
- 互联网
- 电脑基础知识
- 计算机软件及应用
- 计算机硬件及网络
- 计算机应用/办公自动化
- .NET
- 数据结构与算法
- Java
- SEO
- C/C++资料
- linux/Unix相关
- 手机开发
- UML理论/建模
- 并行计算/云计算
- 嵌入式开发
- windows相关
- 软件工程
- 管理信息系统
- 开发文档
- 图形图像
- 网络与通信
- 网络信息安全
- 电子支付
- Labview
- matlab
- 网络资源
- Python
- Delphi/Perl
- 评测
- Flash/Flex
- CSS/Script
- 计算机原理
- PHP资料
- 数据挖掘与模式识别
- Web服务
- 数据库
- Visual Basic
- 电子商务
- 服务器
- 搜索引擎优化
- 存储
- 架构
- 行业软件
- 人工智能
- 计算机辅助设计
- 多媒体
- 软件测试
- 计算机硬件与维护
- 网站策划/UE
- 网页设计/UI
- 网吧管理