mirror of
https://github.com/Rogiel/httpchannel
synced 2025-12-06 07:32:50 +00:00
Modularize in maven projects the httpchannel library
This commit creates several maven modules for each segment of the library. Now it is possible to include only individual services to the classpath instead of the full library.
This commit is contained in:
17
httpchannel-util/pom.xml
Normal file
17
httpchannel-util/pom.xml
Normal file
@@ -0,0 +1,17 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>httpchannel</artifactId>
|
||||
<groupId>com.rogiel.httpchannel</groupId>
|
||||
<version>1.0.0</version>
|
||||
<relativePath>..</relativePath>
|
||||
</parent>
|
||||
<artifactId>httpchannel-util</artifactId>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.rogiel.httpchannel</groupId>
|
||||
<artifactId>httpchannel-api</artifactId>
|
||||
<version>1.0.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* This file is part of seedbox <github.com/seedbox>.
|
||||
*
|
||||
* seedbox is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* seedbox is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with seedbox. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package com.rogiel.httpchannel.service;
|
||||
|
||||
import org.apache.http.Header;
|
||||
import org.apache.http.HttpResponse;
|
||||
|
||||
import com.rogiel.httpchannel.util.ThreadUtils;
|
||||
|
||||
/**
|
||||
* @author rogiel
|
||||
*/
|
||||
public abstract class AbstractDownloader implements Downloader {
|
||||
protected int parseTimer(String stringTimer) {
|
||||
int timer = 0;
|
||||
if (stringTimer != null && stringTimer.length() > 0) {
|
||||
timer = Integer.parseInt(stringTimer);
|
||||
}
|
||||
return timer;
|
||||
}
|
||||
|
||||
protected long getContentLength(HttpResponse response) {
|
||||
final Header contentLengthHeader = response
|
||||
.getFirstHeader("Content-Length");
|
||||
long contentLength = -1;
|
||||
if (contentLengthHeader != null) {
|
||||
contentLength = Long.valueOf(contentLengthHeader.getValue());
|
||||
}
|
||||
return contentLength;
|
||||
}
|
||||
|
||||
protected void timer(DownloadListener listener, long timer) {
|
||||
if (listener != null) {
|
||||
listener.timer(timer);
|
||||
}
|
||||
ThreadUtils.sleep(timer);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* This file is part of seedbox <github.com/seedbox>.
|
||||
*
|
||||
* seedbox is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* seedbox is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with seedbox. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package com.rogiel.httpchannel.service;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.client.ClientProtocolException;
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.client.methods.HttpPost;
|
||||
import org.apache.http.impl.client.DefaultHttpClient;
|
||||
|
||||
import com.rogiel.httpchannel.service.captcha.CaptchaResolver;
|
||||
import com.rogiel.httpchannel.service.config.ServiceConfiguration;
|
||||
import com.rogiel.httpchannel.util.AlwaysRedirectStrategy;
|
||||
import com.rogiel.httpchannel.util.HttpClientUtils;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
|
||||
/**
|
||||
* Abstract base service for HTTP enabled services.
|
||||
*
|
||||
* @author Rogiel
|
||||
* @since 1.0
|
||||
*/
|
||||
public abstract class AbstractHttpService<T extends ServiceConfiguration>
|
||||
extends AbstractService<T> implements Service {
|
||||
private static final ExecutorService threadPool = Executors
|
||||
.newCachedThreadPool();
|
||||
|
||||
/**
|
||||
* The {@link HttpClient} instance for this service
|
||||
*/
|
||||
protected DefaultHttpClient client = new DefaultHttpClient();
|
||||
|
||||
/**
|
||||
* The captcha resolver
|
||||
*/
|
||||
protected CaptchaResolver captchaResolver;
|
||||
|
||||
protected AbstractHttpService(T configuration) {
|
||||
super(configuration);
|
||||
client.setRedirectStrategy(new AlwaysRedirectStrategy());
|
||||
// client.getParams().setBooleanParameter(ClientPNames.HANDLE_REDIRECTS,
|
||||
// true);
|
||||
// client.getParams().setIntParameter(ClientPNames.MAX_REDIRECTS, 10);
|
||||
// client.setRedirectStrategy(new DefaultRedirectStrategy());
|
||||
}
|
||||
|
||||
protected HttpResponse get(String url) throws ClientProtocolException,
|
||||
IOException {
|
||||
final HttpGet request = new HttpGet(url);
|
||||
return client.execute(request);
|
||||
}
|
||||
|
||||
protected HttpResponse get(String url, long rangeStart)
|
||||
throws ClientProtocolException, IOException {
|
||||
final HttpGet request = new HttpGet(url);
|
||||
if (rangeStart >= 0)
|
||||
request.addHeader("Range", "bytes=" + rangeStart + "-");
|
||||
return client.execute(request);
|
||||
}
|
||||
|
||||
protected String getAsString(String url) throws ClientProtocolException,
|
||||
IOException {
|
||||
return HttpClientUtils.toString(get(url));
|
||||
}
|
||||
|
||||
protected HTMLPage getAsPage(String url) throws ClientProtocolException,
|
||||
IOException {
|
||||
return HTMLPage.parse(getAsString(url));
|
||||
}
|
||||
|
||||
public Future<HttpResponse> getAsync(final String url) throws IOException {
|
||||
return threadPool.submit(new Callable<HttpResponse>() {
|
||||
@Override
|
||||
public HttpResponse call() throws Exception {
|
||||
return get(url);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public Future<String> getAsStringAsync(final String url) throws IOException {
|
||||
return threadPool.submit(new Callable<String>() {
|
||||
@Override
|
||||
public String call() throws Exception {
|
||||
return getAsString(url);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public Future<HTMLPage> getAsPageAsync(final String url) throws IOException {
|
||||
return threadPool.submit(new Callable<HTMLPage>() {
|
||||
@Override
|
||||
public HTMLPage call() throws Exception {
|
||||
return getAsPage(url);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
protected HttpResponse post(String url, HttpEntity entity)
|
||||
throws ClientProtocolException, IOException {
|
||||
final HttpPost request = new HttpPost(url);
|
||||
request.setEntity(entity);
|
||||
return client.execute(request);
|
||||
}
|
||||
|
||||
protected String postAsString(String url, HttpEntity entity)
|
||||
throws ClientProtocolException, IOException {
|
||||
return HttpClientUtils.toString(post(url, entity));
|
||||
}
|
||||
|
||||
protected HTMLPage postAsPage(String url, HttpEntity entity)
|
||||
throws ClientProtocolException, IOException {
|
||||
return HTMLPage.parse(postAsString(url, entity));
|
||||
}
|
||||
|
||||
protected Future<HttpResponse> postAsync(final String url,
|
||||
final HttpEntity entity) throws IOException {
|
||||
return threadPool.submit(new Callable<HttpResponse>() {
|
||||
@Override
|
||||
public HttpResponse call() throws Exception {
|
||||
return post(url, entity);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
protected Future<String> postAsStringAsync(final String url,
|
||||
final HttpEntity entity) throws IOException {
|
||||
return threadPool.submit(new Callable<String>() {
|
||||
@Override
|
||||
public String call() throws Exception {
|
||||
return postAsString(url, entity);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
protected Future<HTMLPage> postAsPageAsync(final String url,
|
||||
final HttpEntity entity) throws IOException {
|
||||
return threadPool.submit(new Callable<HTMLPage>() {
|
||||
@Override
|
||||
public HTMLPage call() throws Exception {
|
||||
return postAsPage(url, entity);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* This file is part of seedbox <github.com/seedbox>.
|
||||
*
|
||||
* seedbox is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* seedbox is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with seedbox. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util;
|
||||
|
||||
import org.apache.http.HttpRequest;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.ProtocolException;
|
||||
import org.apache.http.client.methods.HttpUriRequest;
|
||||
import org.apache.http.impl.client.DefaultRedirectStrategy;
|
||||
import org.apache.http.protocol.HttpContext;
|
||||
|
||||
/**
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*
|
||||
*/
|
||||
public class AlwaysRedirectStrategy extends DefaultRedirectStrategy {
|
||||
@Override
|
||||
public boolean isRedirected(HttpRequest request, HttpResponse response,
|
||||
HttpContext context) throws ProtocolException {
|
||||
return response.getFirstHeader("location") != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HttpUriRequest getRedirect(HttpRequest request,
|
||||
HttpResponse response, HttpContext context)
|
||||
throws ProtocolException {
|
||||
return super.getRedirect(request, response, context);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* This file is part of seedbox <github.com/seedbox>.
|
||||
*
|
||||
* seedbox is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* seedbox is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with seedbox. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.ReadableByteChannel;
|
||||
import java.nio.channels.WritableByteChannel;
|
||||
|
||||
/**
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*
|
||||
*/
|
||||
public class ChannelUtils {
|
||||
public static void copy(ReadableByteChannel in, WritableByteChannel out)
|
||||
throws IOException {
|
||||
// First, we need a buffer to hold blocks of copied bytes.
|
||||
ByteBuffer buffer = ByteBuffer.allocateDirect(32 * 1024);
|
||||
|
||||
// Now loop until no more bytes to read and the buffer is empty
|
||||
while (in.read(buffer) != -1 || buffer.position() > 0) {
|
||||
// The read() call leaves the buffer in "fill mode". To prepare
|
||||
// to write bytes from the bufferwe have to put it in "drain mode"
|
||||
// by flipping it: setting limit to position and position to zero
|
||||
buffer.flip();
|
||||
|
||||
// Now write some or all of the bytes out to the output channel
|
||||
out.write(buffer);
|
||||
|
||||
// Compact the buffer by discarding bytes that were written,
|
||||
// and shifting any remaining bytes. This method also
|
||||
// prepares the buffer for the next call to read() by setting the
|
||||
// position to the limit and the limit to the buffer capacity.
|
||||
buffer.compact();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* This file is part of seedbox <github.com/seedbox>.
|
||||
*
|
||||
* seedbox is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* seedbox is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with seedbox. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.client.methods.HttpUriRequest;
|
||||
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
|
||||
public class HttpClientUtils {
|
||||
private static final ExecutorService threadPool = Executors
|
||||
.newCachedThreadPool();
|
||||
|
||||
public static HttpResponse get(HttpClient client, String url)
|
||||
throws IOException {
|
||||
return client.execute(new HttpGet(url));
|
||||
}
|
||||
|
||||
public static String getString(HttpClient client, String url)
|
||||
throws IOException {
|
||||
return toString(get(client, url));
|
||||
}
|
||||
|
||||
public static String execute(HttpClient client, HttpUriRequest request)
|
||||
throws IOException {
|
||||
return toString(client.execute(request));
|
||||
}
|
||||
|
||||
public static Future<String> executeAsync(final HttpClient client,
|
||||
final HttpUriRequest request) throws IOException {
|
||||
return threadPool.submit(new Callable<String>() {
|
||||
@Override
|
||||
public String call() throws Exception {
|
||||
return HttpClientUtils.toString(client.execute(request));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static Future<HttpResponse> executeAsyncHttpResponse(
|
||||
final HttpClient client, final HttpUriRequest request)
|
||||
throws IOException {
|
||||
return threadPool.submit(new Callable<HttpResponse>() {
|
||||
@Override
|
||||
public HttpResponse call() throws Exception {
|
||||
return client.execute(request);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static String toString(HttpResponse response) throws IOException {
|
||||
final InputStream in = response.getEntity().getContent();
|
||||
try {
|
||||
return IOUtils.toString(in);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static HTMLPage toPage(HttpResponse response) throws IOException {
|
||||
return HTMLPage.parse(toString(response));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* This file is part of seedbox <github.com/seedbox>.
|
||||
*
|
||||
* seedbox is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* seedbox is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with seedbox. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class PatternUtils {
|
||||
public static String find(Pattern pattern, String text) {
|
||||
return find(pattern, text, 0);
|
||||
}
|
||||
|
||||
public static int findInt(Pattern pattern, String text, int n) {
|
||||
String found = find(pattern, text, n);
|
||||
return (found != null ? Integer.parseInt(found) : 0);
|
||||
}
|
||||
|
||||
public static String find(Pattern pattern, String text, int n) {
|
||||
final Matcher matcher = pattern.matcher(text);
|
||||
if (matcher.find()) {
|
||||
return matcher.group(n);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static String find(Pattern pattern, String text, int index, int n) {
|
||||
final Matcher matcher = pattern.matcher(text);
|
||||
int found = 0;
|
||||
while (matcher.find() && (++found) < index) {
|
||||
}
|
||||
return (found == 0 ? null : matcher.group(n));
|
||||
}
|
||||
|
||||
public static String match(Pattern pattern, String text) {
|
||||
return match(pattern, text, 0);
|
||||
}
|
||||
|
||||
public static String match(Pattern pattern, String text, int n) {
|
||||
final Matcher matcher = pattern.matcher(text);
|
||||
if (matcher.matches()) {
|
||||
return matcher.group(n);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
|
||||
* This file is part of seedbox <github.com/seedbox>.
|
||||
*
|
||||
* seedbox is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* seedbox is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with seedbox. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util;
|
||||
|
||||
/**
|
||||
* @author Rogiel
|
||||
* @since 1.0
|
||||
*/
|
||||
public class ThreadUtils {
|
||||
public static void sleep(long time) {
|
||||
try {
|
||||
Thread.sleep(time);
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,248 @@
|
||||
/*
|
||||
* This file is part of seedbox <github.com/seedbox>.
|
||||
*
|
||||
* seedbox is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* seedbox is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with seedbox. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.Parser;
|
||||
import org.htmlparser.Tag;
|
||||
import org.htmlparser.tags.FormTag;
|
||||
import org.htmlparser.tags.InputTag;
|
||||
import org.htmlparser.tags.LinkTag;
|
||||
import org.htmlparser.tags.ScriptTag;
|
||||
import org.htmlparser.util.NodeIterator;
|
||||
import org.htmlparser.util.NodeList;
|
||||
import org.htmlparser.util.ParserException;
|
||||
|
||||
/**
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class HTMLPage {
|
||||
private final Parser parser;
|
||||
|
||||
private HTMLPage(Parser parser) {
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
public String getLink(final Pattern pattern) {
|
||||
NodeList nodes;
|
||||
try {
|
||||
nodes = parser.extractAllNodesThatMatch(new NodeFilter() {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof LinkTag))
|
||||
return false;
|
||||
final LinkTag link = (LinkTag) node;
|
||||
return pattern.matcher(link.getLink()).matches();
|
||||
}
|
||||
});
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
if (nodes.size() >= 1)
|
||||
return ((LinkTag) nodes.elements().nextNode()).getLink();
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getFormAction(final Pattern pattern) {
|
||||
NodeList nodes;
|
||||
try {
|
||||
nodes = parser.extractAllNodesThatMatch(new NodeFilter() {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof FormTag))
|
||||
return false;
|
||||
final FormTag form = (FormTag) node;
|
||||
return pattern.matcher(form.getFormLocation()).matches();
|
||||
}
|
||||
});
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
if (nodes.size() >= 1)
|
||||
return ((FormTag) nodes.elements().nextNode()).getFormLocation();
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getInputValue(final String inputName) {
|
||||
NodeList nodes;
|
||||
try {
|
||||
nodes = parser.extractAllNodesThatMatch(new NodeFilter() {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof InputTag))
|
||||
return false;
|
||||
final InputTag input = (InputTag) node;
|
||||
if (!input.getAttribute("name").equals(inputName))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
});
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
if (nodes.size() >= 1)
|
||||
return ((InputTag) nodes.elements().nextNode())
|
||||
.getAttribute("value");
|
||||
return null;
|
||||
}
|
||||
|
||||
public int getIntegerInputValue(final String inputName) {
|
||||
return Integer.parseInt(getInputValue(inputName));
|
||||
}
|
||||
|
||||
public String getInputValue(final Pattern pattern) {
|
||||
NodeList nodes;
|
||||
try {
|
||||
nodes = parser.extractAllNodesThatMatch(new NodeFilter() {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof InputTag))
|
||||
return false;
|
||||
final InputTag input = (InputTag) node;
|
||||
if (input.getAttribute("value") == null)
|
||||
return false;
|
||||
if (!pattern.matcher(input.getAttribute("value")).matches())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
});
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
if (nodes.size() >= 1)
|
||||
return ((InputTag) nodes.elements().nextNode())
|
||||
.getAttribute("value");
|
||||
return null;
|
||||
}
|
||||
|
||||
public Tag getTagByID(final String id) {
|
||||
NodeList nodes;
|
||||
try {
|
||||
nodes = parser.extractAllNodesThatMatch(new NodeFilter() {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof Tag))
|
||||
return false;
|
||||
if (((Tag) node).getAttribute("id") == null)
|
||||
return false;
|
||||
return ((Tag) node).getAttribute("id").equals(id);
|
||||
}
|
||||
});
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
if (nodes.size() >= 1)
|
||||
return ((Tag) nodes.elements().nextNode());
|
||||
return null;
|
||||
}
|
||||
|
||||
public Tag getTagByName(final String name) {
|
||||
NodeList nodes;
|
||||
try {
|
||||
nodes = parser.extractAllNodesThatMatch(new NodeFilter() {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof Tag))
|
||||
return false;
|
||||
return ((Tag) node).getAttribute("name").equals(name);
|
||||
}
|
||||
});
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
if (nodes.size() >= 1)
|
||||
return ((Tag) nodes.elements().nextNode());
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean contains(final String text) {
|
||||
try {
|
||||
for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
|
||||
if (e.nextNode().toPlainTextString().contains(text))
|
||||
return true;
|
||||
}
|
||||
} catch (ParserException e) {
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public String findInScript(final Pattern pattern, int n) {
|
||||
NodeList nodes;
|
||||
try {
|
||||
nodes = parser.extractAllNodesThatMatch(new NodeFilter() {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof ScriptTag))
|
||||
return false;
|
||||
final ScriptTag script = (ScriptTag) node;
|
||||
return pattern.matcher(script.getScriptCode()).find();
|
||||
}
|
||||
});
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
if (nodes.size() >= 1) {
|
||||
final ScriptTag script = (ScriptTag) nodes.elements().nextNode();
|
||||
final Matcher matcher = pattern.matcher(script.getScriptCode());
|
||||
if (matcher.find())
|
||||
return matcher.group(n);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public int findIntegerInScript(final Pattern pattern, int n) {
|
||||
String found = findInScript(pattern, n);
|
||||
if(found == null)
|
||||
return 0;
|
||||
return Integer.parseInt(findInScript(pattern, n));
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
try {
|
||||
for (NodeIterator i = parser.elements(); i.hasMoreNodes();) {
|
||||
builder.append(i.nextNode().toHtml(true));
|
||||
}
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static HTMLPage parse(String html) {
|
||||
return new HTMLPage(Parser.createParser(html, null));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user