mirror of
https://github.com/Rogiel/httpchannel
synced 2025-12-05 23:22:51 +00:00
Implements a new, more clean and robust HTML parser
This commit is contained in:
@@ -153,6 +153,20 @@ public interface AccountDetails {
|
||||
long getMaximumBandwidth();
|
||||
}
|
||||
|
||||
/**
|
||||
* Service accounts that has accounts with hotlink traffic should implement
|
||||
* this interface
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public interface HotLinkingAccountDetails extends AccountDetails {
|
||||
/**
|
||||
* @return the currently free hotlink traffic. <code>-1</code> means no
|
||||
* limit
|
||||
*/
|
||||
long getHotlinkTraffic();
|
||||
}
|
||||
|
||||
/**
|
||||
* Service accounts that has accounts with limited bandwidth should
|
||||
* implement this interface
|
||||
|
||||
@@ -51,7 +51,7 @@ import com.rogiel.httpchannel.service.exception.DownloadNotAuthorizedException;
|
||||
import com.rogiel.httpchannel.service.exception.DownloadNotResumableException;
|
||||
import com.rogiel.httpchannel.service.exception.NoCaptchaServiceException;
|
||||
import com.rogiel.httpchannel.util.ExceptionUtils;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
/**
|
||||
* This service handles uploads to TwoShared.
|
||||
@@ -165,7 +165,7 @@ public class TwoSharedService extends AbstractHttpService implements Service,
|
||||
AbstractUploader<NullUploaderConfiguration> implements
|
||||
Uploader<NullUploaderConfiguration>,
|
||||
LinkedUploadChannelCloseCallback {
|
||||
private Future<HTMLPage> uploadFuture;
|
||||
private Future<Page> uploadFuture;
|
||||
private String uploadID;
|
||||
|
||||
public UploaderImpl(String filename, long filesize,
|
||||
@@ -176,12 +176,12 @@ public class TwoSharedService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public UploadChannel openChannel() throws IOException {
|
||||
logger.debug("Starting upload to TwoShared");
|
||||
final HTMLPage page = get("http://www.2shared.com/").asPage();
|
||||
final Page page = get("http://www.2shared.com/").asPage();
|
||||
|
||||
// locate upload uri
|
||||
final String uri = page.findFormAction(UPLOAD_URL_PATTERN);
|
||||
final String mainDC = page.getInputValue("mainDC");
|
||||
uploadID = page.find(UPLOAD_ID_PATTERN, 1);
|
||||
final String uri = page.form(UPLOAD_URL_PATTERN).asString();
|
||||
final String mainDC = page.inputByName("mainDC").asString();
|
||||
uploadID = page.search(UPLOAD_ID_PATTERN).asString(1);
|
||||
|
||||
logger.debug("Upload URI: {}, DC: {}", uri, mainDC);
|
||||
|
||||
@@ -198,10 +198,10 @@ public class TwoSharedService extends AbstractHttpService implements Service,
|
||||
public String finish() throws IOException {
|
||||
try {
|
||||
uploadFuture.get();
|
||||
final HTMLPage page = get(
|
||||
final Page page = get(
|
||||
"http://www.2shared.com/uploadComplete.jsp?sId="
|
||||
+ uploadID).asPage();
|
||||
return page.getTextareaValueById("downloadLink");
|
||||
return page.textareaByID("downloadLink").asString();
|
||||
} catch (InterruptedException e) {
|
||||
return null;
|
||||
} catch (ExecutionException e) {
|
||||
@@ -232,9 +232,9 @@ public class TwoSharedService extends AbstractHttpService implements Service,
|
||||
DownloadLinkNotFoundException, DownloadLimitExceededException,
|
||||
DownloadNotAuthorizedException, DownloadNotResumableException,
|
||||
UnsolvableCaptchaServiceException, NoCaptchaServiceException {
|
||||
final HTMLPage page = get(uri).asPage();
|
||||
final String downloadUri = page.findScript(
|
||||
DIRECT_DOWNLOAD_URL_PATTERN, 0);
|
||||
final Page page = get(uri).asPage();
|
||||
final String downloadUri = page.script(
|
||||
DIRECT_DOWNLOAD_URL_PATTERN).asString();
|
||||
return download(get(downloadUri));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,9 @@ import com.rogiel.httpchannel.service.AbstractAuthenticator;
|
||||
import com.rogiel.httpchannel.service.AbstractHttpService;
|
||||
import com.rogiel.httpchannel.service.AbstractUploader;
|
||||
import com.rogiel.httpchannel.service.AccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.DiskQuotaAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.FilesizeLimitAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AuthenticationService;
|
||||
import com.rogiel.httpchannel.service.Authenticator;
|
||||
import com.rogiel.httpchannel.service.AuthenticatorCapability;
|
||||
@@ -42,9 +45,6 @@ import com.rogiel.httpchannel.service.UploadChannel;
|
||||
import com.rogiel.httpchannel.service.UploadService;
|
||||
import com.rogiel.httpchannel.service.Uploader;
|
||||
import com.rogiel.httpchannel.service.UploaderCapability;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.DiskQuotaAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.FilesizeLimitAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback;
|
||||
import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration;
|
||||
@@ -52,7 +52,7 @@ import com.rogiel.httpchannel.service.config.NullUploaderConfiguration;
|
||||
import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialException;
|
||||
import com.rogiel.httpchannel.service.exception.ChannelServiceException;
|
||||
import com.rogiel.httpchannel.service.exception.DownloadLinkNotFoundException;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
/**
|
||||
* This service handles uploads to 4shared.com.
|
||||
@@ -168,7 +168,7 @@ public class FourSharedService extends AbstractHttpService implements Service,
|
||||
AbstractUploader<NullUploaderConfiguration> implements
|
||||
Uploader<NullUploaderConfiguration>,
|
||||
LinkedUploadChannelCloseCallback {
|
||||
private Future<HTMLPage> uploadFuture;
|
||||
private Future<Page> uploadFuture;
|
||||
|
||||
public UploaderImpl(String filename, long filesize,
|
||||
NullUploaderConfiguration configuration) {
|
||||
@@ -210,8 +210,8 @@ public class FourSharedService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public String finish() throws IOException {
|
||||
try {
|
||||
final long linkID = Long.parseLong(uploadFuture.get()
|
||||
.getInputValueById("uploadedFileId"));
|
||||
final long linkID = uploadFuture.get()
|
||||
.inputByID("uploadedFileId").asLong();
|
||||
return api.getFileDownloadLink(account.getUsername(),
|
||||
getPassword(), linkID);
|
||||
} catch (InterruptedException e) {
|
||||
|
||||
@@ -48,7 +48,7 @@ import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadCh
|
||||
import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration;
|
||||
import com.rogiel.httpchannel.service.config.NullUploaderConfiguration;
|
||||
import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialException;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
/**
|
||||
* This service handles uploads to UploadKing.com.
|
||||
@@ -147,7 +147,8 @@ public class DepositFilesService extends AbstractHttpService implements
|
||||
|
||||
@Override
|
||||
public CapabilityMatrix<AuthenticatorCapability> getAuthenticationCapability() {
|
||||
return new CapabilityMatrix<AuthenticatorCapability>(AuthenticatorCapability.ACCOUNT_DETAILS);
|
||||
return new CapabilityMatrix<AuthenticatorCapability>(
|
||||
AuthenticatorCapability.ACCOUNT_DETAILS);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -159,7 +160,7 @@ public class DepositFilesService extends AbstractHttpService implements
|
||||
AbstractUploader<NullUploaderConfiguration> implements
|
||||
Uploader<NullUploaderConfiguration>,
|
||||
LinkedUploadChannelCloseCallback {
|
||||
private Future<HTMLPage> uploadFuture;
|
||||
private Future<Page> uploadFuture;
|
||||
|
||||
public UploaderImpl(String filename, long filesize,
|
||||
NullUploaderConfiguration configuration) {
|
||||
@@ -169,11 +170,13 @@ public class DepositFilesService extends AbstractHttpService implements
|
||||
@Override
|
||||
public UploadChannel openChannel() throws IOException {
|
||||
logger.debug("Starting upload to depositfiles.com");
|
||||
final HTMLPage page = get("http://www.depositfiles.com/").asPage();
|
||||
final Page page = get("http://www.depositfiles.com/").asPage();
|
||||
|
||||
final String uri = page.findFormAction(UPLOAD_URI_PATTERN);
|
||||
final String uploadID = page.getInputValue("UPLOAD_IDENTIFIER");
|
||||
final String maxFileSize = page.getInputValue("MAX_FILE_SIZE");
|
||||
final String uri = page.form(UPLOAD_URI_PATTERN).asString();
|
||||
final String uploadID = page.inputByName("UPLOAD_IDENTIFIER")
|
||||
.asString();
|
||||
final String maxFileSize = page.formByName("MAX_FILE_SIZE")
|
||||
.asString();
|
||||
|
||||
logger.debug("Upload URI: {}, ID: {}", uri, uploadID);
|
||||
|
||||
@@ -189,8 +192,8 @@ public class DepositFilesService extends AbstractHttpService implements
|
||||
@Override
|
||||
public String finish() throws IOException {
|
||||
try {
|
||||
final String link = uploadFuture.get().findScript(
|
||||
DOWNLOAD_URI_PATTERN, 0);
|
||||
final String link = uploadFuture.get()
|
||||
.script(DOWNLOAD_URI_PATTERN).asString();
|
||||
if (link == null)
|
||||
return null;
|
||||
return link;
|
||||
@@ -213,7 +216,7 @@ public class DepositFilesService extends AbstractHttpService implements
|
||||
@Override
|
||||
public AccountDetails login() throws IOException {
|
||||
logger.debug("Authenticating into depositfiles.com");
|
||||
HTMLPage page = post("http://depositfiles.com/login.php?return=%2F")
|
||||
Page page = post("http://depositfiles.com/login.php?return=%2F")
|
||||
.parameter("go", true)
|
||||
.parameter("login", credential.getUsername())
|
||||
.parameter("password", credential.getPassword()).asPage();
|
||||
@@ -239,9 +242,10 @@ public class DepositFilesService extends AbstractHttpService implements
|
||||
throw new UnsolvableCaptchaServiceException();
|
||||
} else {
|
||||
captchaService.valid(captcha);
|
||||
if (!page.contains(VALID_LOGIN_REDIRECT))
|
||||
if (!page.search(VALID_LOGIN_REDIRECT).hasResults())
|
||||
throw new AuthenticationInvalidCredentialException();
|
||||
return (account = new AccountDetailsImpl(credential.getUsername()));
|
||||
return (account = new AccountDetailsImpl(
|
||||
credential.getUsername()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,6 @@ import java.util.concurrent.Future;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.http.client.ClientProtocolException;
|
||||
import org.htmlparser.Tag;
|
||||
|
||||
import com.rogiel.httpchannel.service.AbstractAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AbstractAuthenticator;
|
||||
@@ -33,6 +32,9 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader;
|
||||
import com.rogiel.httpchannel.service.AbstractHttpService;
|
||||
import com.rogiel.httpchannel.service.AbstractUploader;
|
||||
import com.rogiel.httpchannel.service.AccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.HotLinkingAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.ReferralAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AuthenticationService;
|
||||
import com.rogiel.httpchannel.service.Authenticator;
|
||||
import com.rogiel.httpchannel.service.AuthenticatorCapability;
|
||||
@@ -50,14 +52,15 @@ import com.rogiel.httpchannel.service.UploadChannel;
|
||||
import com.rogiel.httpchannel.service.UploadService;
|
||||
import com.rogiel.httpchannel.service.Uploader;
|
||||
import com.rogiel.httpchannel.service.UploaderCapability;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback;
|
||||
import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration;
|
||||
import com.rogiel.httpchannel.service.config.NullDownloaderConfiguration;
|
||||
import com.rogiel.httpchannel.service.config.NullUploaderConfiguration;
|
||||
import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialException;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.Filesizes;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
import com.rogiel.httpchannel.util.html.SearchResults;
|
||||
|
||||
/**
|
||||
* This service handles login, upload and download to HotFile.com.
|
||||
@@ -75,17 +78,31 @@ public class HotFileService extends AbstractHttpService implements Service,
|
||||
public static final ServiceID SERVICE_ID = ServiceID.create("hotfile");
|
||||
|
||||
private static final Pattern UPLOAD_URI_PATTERN = Pattern
|
||||
.compile("http://u[0-9]*\\.hotfile\\.com/upload\\.cgi\\?[0-9]*");
|
||||
.compile("http[s]?://u[0-9]+\\.hotfile\\.com/upload\\.cgi\\?[0-9]*");
|
||||
|
||||
private static final Pattern DOWNLOAD_DIRECT_LINK_PATTERN = Pattern
|
||||
.compile("http://hotfile\\.com/get/([0-9]*)/([A-Za-z0-9]*)/([A-Za-z0-9]*)/(.*)");
|
||||
.compile("http[s]?://hotfile\\.com/get/([0-9]+)/([A-Za-z0-9]+)/([A-Za-z0-9]+)/(.+)");
|
||||
// private static final Pattern DOWNLOAD_TIMER = Pattern
|
||||
// .compile("timerend=d\\.getTime\\(\\)\\+([0-9]*);");
|
||||
// private static final Pattern DOWNLOAD_FILESIZE = Pattern
|
||||
// .compile("[0-9]*(\\.[0-9]*)? (K|M|G)B");
|
||||
|
||||
private static final Pattern DOWNLOAD_URI_PATTERN = Pattern
|
||||
.compile("http://hotfile\\.com/dl/([0-9]*)/([A-Za-z0-9]*)/(.*)");
|
||||
.compile("http[s]?://hotfile\\.com/dl/([0-9]+)/([A-Za-z0-9]+)/(.+)");
|
||||
|
||||
// account
|
||||
private static final Pattern ACCOUNT_NAME_PATTERN = Pattern
|
||||
.compile("User: ([^\\|]+)");
|
||||
|
||||
private static final Pattern ACCOUNT_TYPE_PATTERN = Pattern
|
||||
.compile("Account: Free");
|
||||
|
||||
private static final Pattern HOTLINK_TRAFFIC_PATTERN = Pattern.compile(
|
||||
"Hotlink traffic left: ([0-9]+(\\.[0-9]+))(K|M|G)b",
|
||||
Pattern.CASE_INSENSITIVE);
|
||||
|
||||
private static final Pattern REFERRAL_URL_PATTERN = Pattern
|
||||
.compile("http[s]?://hotfile\\.com/register\\.html\\?reff=[0-9]+");
|
||||
|
||||
@Override
|
||||
public ServiceID getServiceID() {
|
||||
@@ -189,7 +206,8 @@ public class HotFileService extends AbstractHttpService implements Service,
|
||||
|
||||
@Override
|
||||
public CapabilityMatrix<AuthenticatorCapability> getAuthenticationCapability() {
|
||||
return new CapabilityMatrix<AuthenticatorCapability>(AuthenticatorCapability.ACCOUNT_DETAILS);
|
||||
return new CapabilityMatrix<AuthenticatorCapability>(
|
||||
AuthenticatorCapability.ACCOUNT_DETAILS);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -201,7 +219,7 @@ public class HotFileService extends AbstractHttpService implements Service,
|
||||
AbstractUploader<NullUploaderConfiguration> implements
|
||||
Uploader<NullUploaderConfiguration>,
|
||||
LinkedUploadChannelCloseCallback {
|
||||
private Future<HTMLPage> uploadFuture;
|
||||
private Future<Page> uploadFuture;
|
||||
|
||||
public UploaderImpl(String filename, long filesize,
|
||||
NullUploaderConfiguration configuration) {
|
||||
@@ -211,8 +229,8 @@ public class HotFileService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public UploadChannel openChannel() throws IOException {
|
||||
logger.debug("Starting upload to hotfile.com");
|
||||
final HTMLPage page = get("http://www.hotfile.com/").asPage();
|
||||
final String action = page.findFormAction(UPLOAD_URI_PATTERN);
|
||||
final Page page = get("http://www.hotfile.com/").asPage();
|
||||
final String action = page.form(UPLOAD_URI_PATTERN).asString();
|
||||
|
||||
logger.debug("Upload URI is {}", action);
|
||||
|
||||
@@ -226,7 +244,8 @@ public class HotFileService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public String finish() throws IOException {
|
||||
try {
|
||||
return uploadFuture.get().getInputValue(DOWNLOAD_URI_PATTERN);
|
||||
return uploadFuture.get().input(DOWNLOAD_URI_PATTERN)
|
||||
.asString();
|
||||
} catch (InterruptedException e) {
|
||||
return null;
|
||||
} catch (ExecutionException e) {
|
||||
@@ -245,7 +264,7 @@ public class HotFileService extends AbstractHttpService implements Service,
|
||||
public DownloadChannel openChannel(DownloadListener listener,
|
||||
long position) throws IOException {
|
||||
logger.debug("Downloading {} from hotfile.com", uri);
|
||||
final HTMLPage page = get(uri).asPage();
|
||||
final Page page = get(uri).asPage();
|
||||
|
||||
// // try to find timer
|
||||
// final String stringTimer = PatternUtils.find(DOWNLOAD_TIMER,
|
||||
@@ -259,8 +278,8 @@ public class HotFileService extends AbstractHttpService implements Service,
|
||||
// + " milliseconds");
|
||||
// }
|
||||
|
||||
final String downloadUrl = page
|
||||
.findLink(DOWNLOAD_DIRECT_LINK_PATTERN);
|
||||
final String downloadUrl = page.link(DOWNLOAD_DIRECT_LINK_PATTERN)
|
||||
.asString();
|
||||
logger.debug("Download link is {}", downloadUrl);
|
||||
// final String tmHash = PatternUtils.find(DOWNLOAD_TMHASH_PATTERN,
|
||||
// content);F
|
||||
@@ -284,15 +303,32 @@ public class HotFileService extends AbstractHttpService implements Service,
|
||||
public AccountDetails login() throws ClientProtocolException,
|
||||
IOException {
|
||||
logger.debug("Authenticating hotfile.com");
|
||||
HTMLPage page = post("http://www.hotfile.com/login.php")
|
||||
Page page = post("http://www.hotfile.com/login.php")
|
||||
.parameter("returnto", "/index.php")
|
||||
.parameter("user", credential.getUsername())
|
||||
.parameter("pass", credential.getPassword()).asPage();
|
||||
|
||||
final Tag accountTag = page.getTagByID("account");
|
||||
if (accountTag == null)
|
||||
page = get("http://www.hotfile.com/myreferals.html?lang=en")
|
||||
.asPage();
|
||||
|
||||
final SearchResults usernameResults = page
|
||||
.search(ACCOUNT_NAME_PATTERN);
|
||||
if (!usernameResults.hasResults())
|
||||
throw new AuthenticationInvalidCredentialException();
|
||||
return (account = new AccountDetailsImpl(credential.getUsername()));
|
||||
|
||||
final String username = usernameResults.asString(1);
|
||||
final String type = page.search(ACCOUNT_TYPE_PATTERN).asString();
|
||||
|
||||
final SearchResults trafficResults = page
|
||||
.search(HOTLINK_TRAFFIC_PATTERN);
|
||||
final long hotlinkTraffic = Filesizes.auto(
|
||||
trafficResults.asDouble(1), trafficResults.asString(3));
|
||||
|
||||
final String referralURL = page.search(REFERRAL_URL_PATTERN)
|
||||
.asString();
|
||||
|
||||
return (account = new AccountDetailsImpl(username, type == null,
|
||||
hotlinkTraffic, referralURL));
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -304,19 +340,48 @@ public class HotFileService extends AbstractHttpService implements Service,
|
||||
}
|
||||
|
||||
private class AccountDetailsImpl extends AbstractAccountDetails implements
|
||||
PremiumAccountDetails {
|
||||
PremiumAccountDetails, ReferralAccountDetails,
|
||||
HotLinkingAccountDetails {
|
||||
private final boolean premium;
|
||||
private final long hotlinkTraffic;
|
||||
private final String referralURL;
|
||||
|
||||
/**
|
||||
* @param username
|
||||
* the username
|
||||
* @param premium
|
||||
* whether the account is premium
|
||||
* @param hotlinkTraffic
|
||||
* the available hotlink traffic
|
||||
* @param referralURL
|
||||
* the referral url
|
||||
*/
|
||||
public AccountDetailsImpl(String username) {
|
||||
public AccountDetailsImpl(String username, boolean premium,
|
||||
long hotlinkTraffic, String referralURL) {
|
||||
super(HotFileService.this, username);
|
||||
this.premium = premium;
|
||||
this.hotlinkTraffic = hotlinkTraffic;
|
||||
this.referralURL = referralURL;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPremium() {
|
||||
// TODO implement this
|
||||
return false;
|
||||
return premium;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getHotlinkTraffic() {
|
||||
return hotlinkTraffic;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMembersReferred() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getReferralURL() {
|
||||
return referralURL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ import com.rogiel.httpchannel.service.UploaderCapability;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback;
|
||||
import com.rogiel.httpchannel.service.config.NullUploaderConfiguration;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
/**
|
||||
* This service handles login, upload and download to HotFile.com.
|
||||
@@ -115,7 +115,7 @@ public class IFileService extends AbstractHttpService implements Service,
|
||||
AbstractUploader<NullUploaderConfiguration> implements
|
||||
Uploader<NullUploaderConfiguration>,
|
||||
LinkedUploadChannelCloseCallback {
|
||||
private Future<HTMLPage> uploadFuture;
|
||||
private Future<Page> uploadFuture;
|
||||
|
||||
public UploaderImpl(String filename, long filesize,
|
||||
NullUploaderConfiguration configuration) {
|
||||
@@ -125,9 +125,9 @@ public class IFileService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public UploadChannel openChannel() throws IOException {
|
||||
logger.debug("Starting upload to ifile.it");
|
||||
final HTMLPage page = get("http://ifile.it/upload-classic.html")
|
||||
final Page page = get("http://ifile.it/upload-classic.html")
|
||||
.asPage();
|
||||
final String action = page.findFormAction(UPLOAD_URI_PATTERN);
|
||||
final String action = page.form(UPLOAD_URI_PATTERN).asString();
|
||||
|
||||
logger.debug("Upload URI is {}", action);
|
||||
|
||||
@@ -141,7 +141,7 @@ public class IFileService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public String finish() throws IOException {
|
||||
try {
|
||||
return uploadFuture.get().getInputValue(DOWNLOAD_URI_PATTERN);
|
||||
return uploadFuture.get().input(DOWNLOAD_URI_PATTERN).asString();
|
||||
} catch (InterruptedException e) {
|
||||
return null;
|
||||
} catch (ExecutionException e) {
|
||||
|
||||
@@ -34,6 +34,7 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader;
|
||||
import com.rogiel.httpchannel.service.AbstractHttpService;
|
||||
import com.rogiel.httpchannel.service.AbstractUploader;
|
||||
import com.rogiel.httpchannel.service.AccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AuthenticationService;
|
||||
import com.rogiel.httpchannel.service.Authenticator;
|
||||
import com.rogiel.httpchannel.service.AuthenticatorCapability;
|
||||
@@ -51,7 +52,6 @@ import com.rogiel.httpchannel.service.UploadChannel;
|
||||
import com.rogiel.httpchannel.service.UploadService;
|
||||
import com.rogiel.httpchannel.service.Uploader;
|
||||
import com.rogiel.httpchannel.service.UploaderCapability;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback;
|
||||
import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration;
|
||||
@@ -60,7 +60,7 @@ import com.rogiel.httpchannel.service.exception.DownloadLimitExceededException;
|
||||
import com.rogiel.httpchannel.service.exception.DownloadLinkNotFoundException;
|
||||
import com.rogiel.httpchannel.util.HttpClientUtils;
|
||||
import com.rogiel.httpchannel.util.PatternUtils;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
/**
|
||||
* This service handles login, upload and download to MegaUpload.com.
|
||||
@@ -223,9 +223,9 @@ public class MegaUploadService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public UploadChannel openChannel() throws IOException {
|
||||
logger.debug("Starting upload to megaupload.com");
|
||||
final HTMLPage page = get("http://www.megaupload.com/multiupload/")
|
||||
final Page page = get("http://www.megaupload.com/multiupload/")
|
||||
.asPage();
|
||||
final String uri = page.findFormAction(UPLOAD_URL_PATTERN);
|
||||
final String uri = page.form(UPLOAD_URL_PATTERN).asString();
|
||||
logger.debug("Upload URI is {}", uri);
|
||||
|
||||
final LinkedUploadChannel channel = createLinkedChannel(this);
|
||||
@@ -279,16 +279,16 @@ public class MegaUploadService extends AbstractHttpService implements Service,
|
||||
response = get(uri).request();
|
||||
}
|
||||
|
||||
final HTMLPage page = HttpClientUtils.toPage(response);
|
||||
final Page page = HttpClientUtils.toPage(response);
|
||||
|
||||
// try to find timer
|
||||
int timer = page.findScriptAsInt(DOWNLOAD_TIMER, 1);
|
||||
int timer = page.script(DOWNLOAD_TIMER).asInteger(1);
|
||||
if (timer > 0 && configuration.getRespectWaitTime()) {
|
||||
logger.debug("");
|
||||
timer(listener, timer * 1000);
|
||||
}
|
||||
final String downloadUrl = page
|
||||
.findLink(DOWNLOAD_DIRECT_LINK_PATTERN);
|
||||
.link(DOWNLOAD_DIRECT_LINK_PATTERN).asString();
|
||||
if (downloadUrl != null && downloadUrl.length() > 0) {
|
||||
final HttpResponse downloadResponse = get(downloadUrl)
|
||||
.position(position).request();
|
||||
@@ -322,12 +322,12 @@ public class MegaUploadService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public AccountDetails login() throws IOException {
|
||||
logger.debug("Starting login to megaupload.com");
|
||||
final HTMLPage page = post("http://www.megaupload.com/?c=login")
|
||||
final Page page = post("http://www.megaupload.com/?c=login")
|
||||
.parameter("login", true)
|
||||
.parameter("username", credential.getUsername())
|
||||
.parameter("", credential.getPassword()).asPage();
|
||||
|
||||
String username = page.findScript(LOGIN_USERNAME_PATTERN, 1);
|
||||
String username = page.script(LOGIN_USERNAME_PATTERN).asString(1);
|
||||
if (username == null)
|
||||
throw new AuthenticationInvalidCredentialException();
|
||||
return (account = new AccountDetailsImpl(credential.getUsername()));
|
||||
|
||||
@@ -31,6 +31,7 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader;
|
||||
import com.rogiel.httpchannel.service.AbstractHttpService;
|
||||
import com.rogiel.httpchannel.service.AbstractUploader;
|
||||
import com.rogiel.httpchannel.service.AccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AuthenticationService;
|
||||
import com.rogiel.httpchannel.service.Authenticator;
|
||||
import com.rogiel.httpchannel.service.AuthenticatorCapability;
|
||||
@@ -48,7 +49,6 @@ import com.rogiel.httpchannel.service.UploadChannel;
|
||||
import com.rogiel.httpchannel.service.UploadService;
|
||||
import com.rogiel.httpchannel.service.Uploader;
|
||||
import com.rogiel.httpchannel.service.UploaderCapability;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback;
|
||||
import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration;
|
||||
@@ -60,8 +60,7 @@ import com.rogiel.httpchannel.service.exception.DownloadNotAuthorizedException;
|
||||
import com.rogiel.httpchannel.service.exception.DownloadNotResumableException;
|
||||
import com.rogiel.httpchannel.service.multiupload.MultiUploadUploaderConfiguration.MultiUploadMirrorService;
|
||||
import com.rogiel.httpchannel.util.PatternUtils;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
/**
|
||||
* This service handles uploads to MultiUpload.nl.
|
||||
@@ -196,7 +195,8 @@ public class MultiUploadService extends AbstractHttpService implements Service,
|
||||
|
||||
@Override
|
||||
public CapabilityMatrix<AuthenticatorCapability> getAuthenticationCapability() {
|
||||
return new CapabilityMatrix<AuthenticatorCapability>(AuthenticatorCapability.ACCOUNT_DETAILS);
|
||||
return new CapabilityMatrix<AuthenticatorCapability>(
|
||||
AuthenticatorCapability.ACCOUNT_DETAILS);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -219,7 +219,7 @@ public class MultiUploadService extends AbstractHttpService implements Service,
|
||||
public UploadChannel openChannel() throws IOException {
|
||||
logger.debug("Starting upload to multiupload.nl");
|
||||
final String uri = get("http://www.multiupload.nl/").asPage()
|
||||
.findFormAction(UPLOAD_URI_PATTERN);
|
||||
.form(UPLOAD_URI_PATTERN).asString();
|
||||
logger.debug("Upload URI is {}", uri);
|
||||
final LinkedUploadChannel channel = createLinkedChannel(this);
|
||||
|
||||
@@ -273,8 +273,9 @@ public class MultiUploadService extends AbstractHttpService implements Service,
|
||||
long position) throws IOException,
|
||||
DownloadLinkNotFoundException, DownloadLimitExceededException,
|
||||
DownloadNotAuthorizedException, DownloadNotResumableException {
|
||||
final HTMLPage page = get(uri).asPage();
|
||||
final String link = page.findLink(DIRECT_DOWNLOAD_LINK_PATTERN);
|
||||
final Page page = get(uri).asPage();
|
||||
final String link = page.link(DIRECT_DOWNLOAD_LINK_PATTERN)
|
||||
.asString();
|
||||
logger.debug("Direct download link is {}", link);
|
||||
if (link == null)
|
||||
throw new DownloadLinkNotFoundException();
|
||||
@@ -292,11 +293,13 @@ public class MultiUploadService extends AbstractHttpService implements Service,
|
||||
|
||||
@Override
|
||||
public AccountDetails login() throws IOException {
|
||||
final HTMLPage page = post("http://www.multiupload.nl/login")
|
||||
final Page page = post("http://www.multiupload.nl/login")
|
||||
.parameter("username", credential.getUsername())
|
||||
.parameter("password", credential.getPassword()).asPage();
|
||||
|
||||
if (!page.containsIgnoreCase(credential.getUsername()))
|
||||
if (page.search(Pattern.compile(
|
||||
Pattern.quote(credential.getUsername()),
|
||||
Pattern.CASE_INSENSITIVE)) != null)
|
||||
throw new AuthenticationInvalidCredentialException();
|
||||
return (account = new AccountDetailsImpl(credential.getUsername()));
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader;
|
||||
import com.rogiel.httpchannel.service.AbstractHttpService;
|
||||
import com.rogiel.httpchannel.service.AbstractUploader;
|
||||
import com.rogiel.httpchannel.service.AccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AuthenticationService;
|
||||
import com.rogiel.httpchannel.service.Authenticator;
|
||||
import com.rogiel.httpchannel.service.AuthenticatorCapability;
|
||||
@@ -49,7 +50,6 @@ import com.rogiel.httpchannel.service.UploadChannel;
|
||||
import com.rogiel.httpchannel.service.UploadService;
|
||||
import com.rogiel.httpchannel.service.Uploader;
|
||||
import com.rogiel.httpchannel.service.UploaderCapability;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback;
|
||||
import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration;
|
||||
@@ -59,7 +59,7 @@ import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialE
|
||||
import com.rogiel.httpchannel.service.exception.DownloadLinkNotFoundException;
|
||||
import com.rogiel.httpchannel.service.exception.InvalidCaptchaException;
|
||||
import com.rogiel.httpchannel.util.PatternUtils;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
/**
|
||||
* This service handles uploads to UploadKing.com.
|
||||
@@ -215,11 +215,11 @@ public class UploadHereService extends AbstractHttpService implements Service,
|
||||
|
||||
@Override
|
||||
public UploadChannel openChannel() throws IOException {
|
||||
final HTMLPage page = get("http://www.uploadhere.com/").asPage();
|
||||
final Page page = get("http://www.uploadhere.com/").asPage();
|
||||
|
||||
final String userCookie = page.getInputValueById("usercookie");
|
||||
final String uri = page.findFormAction(UPLOAD_URI_PATTERN);
|
||||
final String uploadID = page.getInputValue("UPLOAD_IDENTIFIER");
|
||||
final String userCookie = page.inputByID("usercookie").asString();
|
||||
final String uri = page.form(UPLOAD_URI_PATTERN).asString();
|
||||
final String uploadID = page.inputByName("UPLOAD_IDENTIFIER").asString();
|
||||
|
||||
logger.debug("Upload URI: {}, UserCookie: {}, UploadID: {}",
|
||||
new Object[] { uri, userCookie, uploadID });
|
||||
@@ -262,9 +262,9 @@ public class UploadHereService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public DownloadChannel openChannel(DownloadListener listener,
|
||||
long position) throws IOException {
|
||||
HTMLPage page = get(uri).asPage();
|
||||
Page page = get(uri).asPage();
|
||||
|
||||
final int waitTime = page.findScriptAsInt(TIMER_PATTERN, 1) * 1000;
|
||||
final int waitTime = page.script(TIMER_PATTERN).asInteger(1) * 1000;
|
||||
logger.debug("Wait time is {}", waitTime);
|
||||
|
||||
timer(listener, waitTime);
|
||||
@@ -309,11 +309,11 @@ public class UploadHereService extends AbstractHttpService implements Service,
|
||||
|
||||
@Override
|
||||
public AccountDetails login() throws IOException {
|
||||
final HTMLPage page = post("http://www.uploadhere.com/login")
|
||||
final Page page = post("http://www.uploadhere.com/login")
|
||||
.parameter("do", "login")
|
||||
.parameter("username", credential.getUsername())
|
||||
.parameter("password", credential.getPassword()).asPage();
|
||||
if (page.contains(INVALID_LOGIN_STRING))
|
||||
if (page.searchFirst(INVALID_LOGIN_STRING).hasResults())
|
||||
throw new AuthenticationInvalidCredentialException();
|
||||
return (account = new AccountDetailsImpl(credential.getUsername()));
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader;
|
||||
import com.rogiel.httpchannel.service.AbstractHttpService;
|
||||
import com.rogiel.httpchannel.service.AbstractUploader;
|
||||
import com.rogiel.httpchannel.service.AccountDetails;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.AuthenticationService;
|
||||
import com.rogiel.httpchannel.service.Authenticator;
|
||||
import com.rogiel.httpchannel.service.AuthenticatorCapability;
|
||||
@@ -49,7 +50,6 @@ import com.rogiel.httpchannel.service.UploadChannel;
|
||||
import com.rogiel.httpchannel.service.UploadService;
|
||||
import com.rogiel.httpchannel.service.Uploader;
|
||||
import com.rogiel.httpchannel.service.UploaderCapability;
|
||||
import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel;
|
||||
import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback;
|
||||
import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration;
|
||||
@@ -59,7 +59,7 @@ import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialE
|
||||
import com.rogiel.httpchannel.service.exception.DownloadLinkNotFoundException;
|
||||
import com.rogiel.httpchannel.service.exception.InvalidCaptchaException;
|
||||
import com.rogiel.httpchannel.util.PatternUtils;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
/**
|
||||
* This service handles uploads to zshare.net.
|
||||
@@ -215,11 +215,11 @@ public class UploadKingService extends AbstractHttpService implements Service,
|
||||
|
||||
@Override
|
||||
public UploadChannel openChannel() throws IOException {
|
||||
final HTMLPage page = get("http://www.uploadking.com/").asPage();
|
||||
final Page page = get("http://www.uploadking.com/").asPage();
|
||||
|
||||
final String userCookie = page.getInputValueById("usercookie");
|
||||
final String uri = page.findFormAction(UPLOAD_URI_PATTERN);
|
||||
final String uploadID = page.getInputValue("UPLOAD_IDENTIFIER");
|
||||
final String userCookie = page.inputByID("usercookie").asString();
|
||||
final String uri = page.form(UPLOAD_URI_PATTERN).asString();
|
||||
final String uploadID = page.inputByName("UPLOAD_IDENTIFIER").asString();
|
||||
|
||||
logger.debug("Upload URI: {}, UserCookie: {}, UploadID: {}",
|
||||
new Object[] { uri, userCookie, uploadID });
|
||||
@@ -259,9 +259,9 @@ public class UploadKingService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public DownloadChannel openChannel(DownloadListener listener,
|
||||
long position) throws IOException {
|
||||
HTMLPage page = get(uri).asPage();
|
||||
Page page = get(uri).asPage();
|
||||
|
||||
final int waitTime = page.findScriptAsInt(TIMER_PATTERN, 1) * 1000;
|
||||
final int waitTime = page.script(TIMER_PATTERN).asInteger(1) * 1000;
|
||||
logger.debug("Wait time is {}", waitTime);
|
||||
|
||||
timer(listener, waitTime);
|
||||
@@ -306,11 +306,11 @@ public class UploadKingService extends AbstractHttpService implements Service,
|
||||
|
||||
@Override
|
||||
public AccountDetails login() throws IOException {
|
||||
final HTMLPage page = post("http://www.uploadking.com/login")
|
||||
final Page page = post("http://www.uploadking.com/login")
|
||||
.parameter("do", "login")
|
||||
.parameter("username", credential.getUsername())
|
||||
.parameter("password", credential.getPassword()).asPage();
|
||||
if (page.contains(INVALID_LOGIN_STRING))
|
||||
if (page.searchFirst(INVALID_LOGIN_STRING).hasResults())
|
||||
throw new AuthenticationInvalidCredentialException();
|
||||
return (account = new AccountDetailsImpl(credential.getUsername()));
|
||||
}
|
||||
|
||||
@@ -50,7 +50,8 @@ import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadCh
|
||||
import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration;
|
||||
import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialException;
|
||||
import com.rogiel.httpchannel.util.Filesizes;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
import com.rogiel.httpchannel.util.html.SearchResults;
|
||||
|
||||
/**
|
||||
* This service handles login, upload and download to uptobox.com.
|
||||
@@ -59,7 +60,7 @@ import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
* @since 1.0
|
||||
*/
|
||||
public class UptoboxService extends AbstractHttpService implements Service,
|
||||
UploadService<UptoboxConfiguration>,
|
||||
UploadService<UptoboxUploaderConfiguration>,
|
||||
AuthenticationService<NullAuthenticatorConfiguration> {
|
||||
/**
|
||||
* This service ID
|
||||
@@ -97,20 +98,20 @@ public class UptoboxService extends AbstractHttpService implements Service,
|
||||
}
|
||||
|
||||
@Override
|
||||
public Uploader<UptoboxConfiguration> getUploader(String filename,
|
||||
long filesize, UptoboxConfiguration configuration) {
|
||||
public Uploader<UptoboxUploaderConfiguration> getUploader(String filename,
|
||||
long filesize, UptoboxUploaderConfiguration configuration) {
|
||||
return new UploaderImpl(filename, filesize, configuration);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Uploader<UptoboxConfiguration> getUploader(String filename,
|
||||
public Uploader<UptoboxUploaderConfiguration> getUploader(String filename,
|
||||
long filesize) {
|
||||
return getUploader(filename, filesize, newUploaderConfiguration());
|
||||
}
|
||||
|
||||
@Override
|
||||
public UptoboxConfiguration newUploaderConfiguration() {
|
||||
return new UptoboxConfiguration();
|
||||
public UptoboxUploaderConfiguration newUploaderConfiguration() {
|
||||
return new UptoboxUploaderConfiguration();
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -164,28 +165,29 @@ public class UptoboxService extends AbstractHttpService implements Service,
|
||||
return account;
|
||||
}
|
||||
|
||||
protected class UploaderImpl extends AbstractUploader<UptoboxConfiguration>
|
||||
implements Uploader<UptoboxConfiguration>,
|
||||
protected class UploaderImpl extends
|
||||
AbstractUploader<UptoboxUploaderConfiguration> implements
|
||||
Uploader<UptoboxUploaderConfiguration>,
|
||||
LinkedUploadChannelCloseCallback {
|
||||
private Future<HTMLPage> uploadFuture;
|
||||
private Future<Page> uploadFuture;
|
||||
|
||||
public UploaderImpl(String filename, long filesize,
|
||||
UptoboxConfiguration configuration) {
|
||||
UptoboxUploaderConfiguration configuration) {
|
||||
super(UptoboxService.this, filename, filesize, configuration);
|
||||
}
|
||||
|
||||
@Override
|
||||
public UploadChannel openChannel() throws IOException {
|
||||
logger.debug("Starting upload to ifile.it");
|
||||
final HTMLPage page = get("http://uptobox.com/").asPage();
|
||||
String action = page.findFormAction(UPLOAD_URI_PATTERN);
|
||||
final String srvTmpUrl = page.getInputValue("srv_tmp_url");
|
||||
logger.debug("Starting upload to uptobox.com");
|
||||
final Page page = get("http://uptobox.com/").asPage();
|
||||
String action = page.form(UPLOAD_URI_PATTERN).asString();
|
||||
final String srvTmpUrl = page.inputByName("srv_tmp_url").asString();
|
||||
|
||||
if (account != null) {
|
||||
action += "&type=reg";
|
||||
}
|
||||
|
||||
final String sessionID = page.getInputValue("sess_id");
|
||||
final String sessionID = page.inputByName("sess_id").asString();
|
||||
|
||||
logger.debug("Upload URI is {}", action);
|
||||
|
||||
@@ -202,7 +204,7 @@ public class UptoboxService extends AbstractHttpService implements Service,
|
||||
@Override
|
||||
public String finish() throws IOException {
|
||||
try {
|
||||
return uploadFuture.get().findLink(DOWNLOAD_URI_PATTERN);
|
||||
return uploadFuture.get().link(DOWNLOAD_URI_PATTERN).asString();
|
||||
} catch (InterruptedException e) {
|
||||
return null;
|
||||
} catch (ExecutionException e) {
|
||||
@@ -221,37 +223,42 @@ public class UptoboxService extends AbstractHttpService implements Service,
|
||||
|
||||
@Override
|
||||
public AccountDetails login() throws IOException {
|
||||
final HTMLPage page = post("http://uptobox.com/")
|
||||
final Page page = post("http://uptobox.com/")
|
||||
.parameter("op", "login")
|
||||
.parameter("redirect", "http://uptobox.com/?op=my_account")
|
||||
.parameter("login", credential.getUsername())
|
||||
.parameter("password", credential.getPassword()).asPage();
|
||||
|
||||
final String username = page.findPlain(
|
||||
Pattern.compile("Username:(.+) Apply"), 1);
|
||||
final SearchResults results = page.search(Pattern
|
||||
.compile("Username:(.+) Apply"));
|
||||
if (!results.hasResults())
|
||||
throw new AuthenticationInvalidCredentialException();
|
||||
final String username = results.asString(1);
|
||||
if (username == null)
|
||||
throw new AuthenticationInvalidCredentialException();
|
||||
final boolean premium = !page.containsPlain(Pattern.compile(
|
||||
"Account type Free member", Pattern.MULTILINE));
|
||||
final int points = page.findIntPlain(
|
||||
Pattern.compile("You have collected:([0-9])+"), 1);
|
||||
final int referrals = page.findIntPlain(
|
||||
Pattern.compile("My referrals:([0-9])+"), 1);
|
||||
final String referralURL = page.findLink(Pattern
|
||||
.compile("http://uptobox\\.com/affiliate/[0-9]+"));
|
||||
final boolean premium = !page.search(
|
||||
Pattern.compile("Account type Free member",
|
||||
Pattern.MULTILINE)).hasResults();
|
||||
final int points = page.search(
|
||||
Pattern.compile("You have collected:([0-9])+"))
|
||||
.asInteger(1);
|
||||
final int referrals = page.search(
|
||||
Pattern.compile("My referrals:([0-9])+")).asInteger(1);
|
||||
final String referralURL = page.link(
|
||||
Pattern.compile("http://uptobox\\.com/affiliate/[0-9]+"))
|
||||
.asString();
|
||||
|
||||
final HTMLPage index = get("http://uptobox.com/").asPage();
|
||||
final int maximumFileSize = index.findIntPlain(
|
||||
Pattern.compile("Up to ([0-9]*) Mb"), 1);
|
||||
final Page index = get("http://uptobox.com/").asPage();
|
||||
final int maximumFileSize = index.search(
|
||||
Pattern.compile("Up to ([0-9]*) Mb")).asInteger(1);
|
||||
|
||||
final HTMLPage disk = get("http://uptobox.com/?op=my_files")
|
||||
.asPage();
|
||||
final double usedDiskSpace = disk.findDoublePlain(
|
||||
DISK_USAGE_PATTERN, 1);
|
||||
final String usedDiskSpaceUnit = disk.findPlain(DISK_USAGE_PATTERN,
|
||||
3);
|
||||
final double maximumDiskSpace = disk.findDoublePlain(
|
||||
DISK_USAGE_PATTERN, 4);
|
||||
final Page disk = get("http://uptobox.com/?op=my_files").asPage();
|
||||
final double usedDiskSpace = disk.search(DISK_USAGE_PATTERN)
|
||||
.asDouble(1);
|
||||
final String usedDiskSpaceUnit = disk.search(DISK_USAGE_PATTERN)
|
||||
.asString(3);
|
||||
final double maximumDiskSpace = disk.search(DISK_USAGE_PATTERN)
|
||||
.asDouble(4);
|
||||
|
||||
return (account = new AccountDetailsImpl(username, premium,
|
||||
Filesizes.mb(maximumFileSize),
|
||||
|
||||
@@ -28,7 +28,7 @@ import com.rogiel.httpchannel.service.uptobox.UptoboxService.UploaderImpl;
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class UptoboxConfiguration extends
|
||||
public class UptoboxUploaderConfiguration extends
|
||||
AbstractUploaderConfiguration implements UploaderConfiguration,
|
||||
DescriptionableUploaderConfiguration {
|
||||
/**
|
||||
@@ -42,7 +42,7 @@ public class UptoboxConfiguration extends
|
||||
}
|
||||
|
||||
@Override
|
||||
public UptoboxConfiguration description(String description) {
|
||||
public UptoboxUploaderConfiguration description(String description) {
|
||||
this.description = description;
|
||||
return this;
|
||||
}
|
||||
@@ -22,10 +22,9 @@ import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rogiel.httpchannel.captcha.ImageCaptcha;
|
||||
import com.rogiel.httpchannel.http.HttpContext;
|
||||
import com.rogiel.httpchannel.util.PatternUtils;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
/**
|
||||
* This class provides utility methods to extract an {@link ImageCaptcha} from
|
||||
@@ -57,8 +56,8 @@ public class ReCaptchaExtractor {
|
||||
* the {@link HttpContext}
|
||||
* @return the {@link ImageCaptcha} embedded at the given <code>page</code>
|
||||
*/
|
||||
public static ImageCaptcha extractCaptcha(HTMLPage page, HttpContext ctx) {
|
||||
final String uri = page.findScriptSrc(CAPTCHA_URI_PATTERN);
|
||||
public static ImageCaptcha extractCaptcha(Page page, HttpContext ctx) {
|
||||
final String uri = page.scriptBySource(CAPTCHA_URI_PATTERN).asString();
|
||||
if (uri == null)
|
||||
return null;
|
||||
try {
|
||||
@@ -77,8 +76,8 @@ public class ReCaptchaExtractor {
|
||||
* the {@link HttpContext}
|
||||
* @return the {@link ImageCaptcha} contained at the given <code>page</code>
|
||||
*/
|
||||
public static ImageCaptcha extractAjaxCaptcha(HTMLPage page, HttpContext ctx) {
|
||||
final String siteID = page.findScript(CAPTCHA_ID_PATTERN, 1);
|
||||
public static ImageCaptcha extractAjaxCaptcha(Page page, HttpContext ctx) {
|
||||
final String siteID = page.script(CAPTCHA_ID_PATTERN).asString(1);
|
||||
try {
|
||||
return doExtract(ctx.get(CHALLENGE_BASE_URI + siteID).asString());
|
||||
} catch (IOException e) {
|
||||
|
||||
@@ -33,7 +33,7 @@ import org.json.simple.parser.JSONParser;
|
||||
import org.json.simple.parser.ParseException;
|
||||
|
||||
import com.rogiel.httpchannel.util.HttpClientUtils;
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
public abstract class Request {
|
||||
private static final JSONParser jsonParser = new JSONParser();
|
||||
@@ -90,14 +90,14 @@ public abstract class Request {
|
||||
});
|
||||
}
|
||||
|
||||
public HTMLPage asPage() throws ClientProtocolException, IOException {
|
||||
return HTMLPage.parse(asString());
|
||||
public Page asPage() throws ClientProtocolException, IOException {
|
||||
return Page.parse(asString());
|
||||
}
|
||||
|
||||
public Future<HTMLPage> asPageAsync() throws IOException {
|
||||
return ctx.threadPool.submit(new Callable<HTMLPage>() {
|
||||
public Future<Page> asPageAsync() throws IOException {
|
||||
return ctx.threadPool.submit(new Callable<Page>() {
|
||||
@Override
|
||||
public HTMLPage call() throws Exception {
|
||||
public Page call() throws Exception {
|
||||
return asPage();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -31,7 +31,7 @@ import org.apache.http.client.HttpClient;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.client.methods.HttpUriRequest;
|
||||
|
||||
import com.rogiel.httpchannel.util.htmlparser.HTMLPage;
|
||||
import com.rogiel.httpchannel.util.html.Page;
|
||||
|
||||
public class HttpClientUtils {
|
||||
private static final ExecutorService threadPool = Executors
|
||||
@@ -82,7 +82,7 @@ public class HttpClientUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static HTMLPage toPage(HttpResponse response) throws IOException {
|
||||
return HTMLPage.parse(toString(response));
|
||||
public static Page toPage(HttpResponse response) throws IOException {
|
||||
return Page.parse(toString(response));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,174 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.html;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Tag;
|
||||
|
||||
/**
|
||||
* An {@link PageElement} that has an matched string attached to it
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class MatchedElement<T extends Tag> extends PageElement<T> {
|
||||
/**
|
||||
* The regular expression {@link Matcher} that retains the matched strings
|
||||
* to it
|
||||
*/
|
||||
private final Matcher matcher;
|
||||
|
||||
/**
|
||||
* @param tag
|
||||
* the tag
|
||||
* @param matcher
|
||||
* the matcher
|
||||
*/
|
||||
public MatchedElement(T tag, Matcher matcher) {
|
||||
super(tag);
|
||||
this.matcher = matcher;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param tag
|
||||
* the tag
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @param content
|
||||
* the content
|
||||
*/
|
||||
public MatchedElement(T tag, Pattern pattern, String content) {
|
||||
super(tag);
|
||||
this.matcher = pattern.matcher(content);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param tag
|
||||
* the tag
|
||||
* @param content
|
||||
* the content
|
||||
*/
|
||||
public MatchedElement(T tag, String content) {
|
||||
this(tag, Pattern.compile(Pattern.quote(content)), content);
|
||||
this.matcher.matches();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if the element has an matched element
|
||||
*/
|
||||
public boolean matches() {
|
||||
matcher.reset();
|
||||
return matcher.matches();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if the element has an matched element (the
|
||||
* entire value matches the pattern)
|
||||
*/
|
||||
public boolean matchesEntirelly() {
|
||||
return matcher.lookingAt();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if the pattern has found something on the
|
||||
* element that matches it
|
||||
*/
|
||||
public boolean find() {
|
||||
matcher.reset();
|
||||
return matcher.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param n
|
||||
* the group number
|
||||
* @return <code>true</code> if the group exists
|
||||
*/
|
||||
public boolean hasGroup(int n) {
|
||||
return n <= matcher.groupCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the entire matched value as a string
|
||||
*/
|
||||
public String asString() {
|
||||
return asString(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the group value as a string
|
||||
*/
|
||||
public String asString(int n) {
|
||||
return matcher.group(n);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the entire matched value as a integer
|
||||
*/
|
||||
public int asInteger() {
|
||||
return asInteger(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the group value as a integer
|
||||
*/
|
||||
public int asInteger(int n) {
|
||||
return Integer.parseInt(asString(n));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the entire matched value as a long
|
||||
*/
|
||||
public long asLong() {
|
||||
return asLong(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the group value as a long
|
||||
*/
|
||||
public long asLong(int n) {
|
||||
return Long.parseLong(asString(n));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the entire matched value as a double
|
||||
*/
|
||||
public double asDouble() {
|
||||
return asDouble(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the group value as a double
|
||||
*/
|
||||
public double asDouble(int n) {
|
||||
return Double.parseDouble(asString(n));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the pattern matched against the element
|
||||
*/
|
||||
public Pattern getPattern() {
|
||||
return matcher.pattern();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "MatchedElement [tag=" + tag + ", pattern=" + getPattern() + "]";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,841 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.html;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.Parser;
|
||||
import org.htmlparser.Tag;
|
||||
import org.htmlparser.filters.AndFilter;
|
||||
import org.htmlparser.tags.FormTag;
|
||||
import org.htmlparser.tags.FrameTag;
|
||||
import org.htmlparser.tags.ImageTag;
|
||||
import org.htmlparser.tags.InputTag;
|
||||
import org.htmlparser.tags.LinkTag;
|
||||
import org.htmlparser.tags.ScriptTag;
|
||||
import org.htmlparser.tags.TextareaTag;
|
||||
import org.htmlparser.util.NodeIterator;
|
||||
import org.htmlparser.util.NodeList;
|
||||
import org.htmlparser.util.ParserException;
|
||||
|
||||
import com.rogiel.httpchannel.util.html.PageElement.TagMatcher;
|
||||
import com.rogiel.httpchannel.util.html.filter.TypeTagFilter;
|
||||
import com.rogiel.httpchannel.util.html.matcher.IDTagMatcher;
|
||||
import com.rogiel.httpchannel.util.html.matcher.NameTagMatcher;
|
||||
|
||||
/**
|
||||
* This class handles all HTML parsing and searching. With this class is easy to
|
||||
* search for links matching an {@link Pattern}, for images, frames, forms,
|
||||
* inputs and maany more HTML widgets.
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class Page {
|
||||
/**
|
||||
* The list of nodes on the HTML DOM model
|
||||
*/
|
||||
private final NodeList nodes;
|
||||
|
||||
/**
|
||||
* This interface provides a mean to transform an list of objects into
|
||||
* another type
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*
|
||||
* @param <I>
|
||||
* the input object type
|
||||
* @param <O>
|
||||
* the output object type
|
||||
*/
|
||||
private interface ListProcessor<I extends Tag, O> {
|
||||
O process(I tag);
|
||||
}
|
||||
|
||||
/**
|
||||
* An default {@link ListProcessor} that converts all tags to an
|
||||
* {@link PageElement}
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*
|
||||
* @param <I>
|
||||
* the input type
|
||||
*/
|
||||
private class DefaultListProcessor<I extends Tag> implements
|
||||
ListProcessor<I, PageElement<I>> {
|
||||
@Override
|
||||
public PageElement<I> process(I tag) {
|
||||
return new PageElement<I>(tag);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new page instance
|
||||
*
|
||||
* @param parser
|
||||
* the HTML parser
|
||||
* @throws ParserException
|
||||
* an parsing exception
|
||||
*/
|
||||
public Page(Parser parser) throws ParserException {
|
||||
this.nodes = parser.parse(null);
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** INTERNAL
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* Filters all the tags within this page to those matching the filter
|
||||
*
|
||||
* @param processor
|
||||
* the list processor
|
||||
* @param filters
|
||||
* the filters to be applied
|
||||
* @return an list of matching tags
|
||||
*/
|
||||
private <T extends Tag, O> List<O> filter(ListProcessor<T, O> processor,
|
||||
NodeFilter... filters) {
|
||||
final NodeFilter filter;
|
||||
if (filters.length == 1)
|
||||
filter = filters[0];
|
||||
else
|
||||
filter = new AndFilter(filters);
|
||||
try {
|
||||
return list(nodes.extractAllNodesThatMatch(filter, true), processor);
|
||||
} catch (ParserException e) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a list of converted objects
|
||||
*
|
||||
* @param list
|
||||
* the input list
|
||||
* @param processor
|
||||
* the processor that converts the object types
|
||||
* @return the processed and converted list
|
||||
* @throws ParserException
|
||||
* if any exception occur
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
private <T extends Tag, O> List<O> list(final NodeList list,
|
||||
ListProcessor<T, O> processor) throws ParserException {
|
||||
final List<O> filtered = new ArrayList<>();
|
||||
final NodeIterator iterator = list.elements();
|
||||
while (iterator.hasMoreNodes()) {
|
||||
filtered.add(processor.process((T) iterator.nextNode()));
|
||||
}
|
||||
return filtered;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to search for a tag value that matches exactly (the entire string)
|
||||
* with the pattern.
|
||||
*
|
||||
* @param list
|
||||
* the list of elements
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @param tagMatcher
|
||||
* the tag matcher (which will be matched against the pattern)
|
||||
* @param realMatcher
|
||||
* the real matcher (which will be returned on the
|
||||
* {@link MatchedElement})
|
||||
* @return an list of {@link MatchedElement}
|
||||
*/
|
||||
private <T extends Tag, E extends PageElement<T>> List<MatchedElement<T>> match(
|
||||
List<E> list, Pattern pattern, TagMatcher<T> tagMatcher,
|
||||
TagMatcher<T> realMatcher) {
|
||||
final List<MatchedElement<T>> matchList = new ArrayList<>();
|
||||
for (final E tag : list) {
|
||||
final MatchedElement<T> matched = tag.match(pattern, tagMatcher);
|
||||
if (matched == null)
|
||||
continue;
|
||||
if (matched.matches()) {
|
||||
if (tagMatcher == realMatcher) {
|
||||
matchList.add(matched);
|
||||
} else {
|
||||
matchList.add(tag.match(realMatcher));
|
||||
}
|
||||
}
|
||||
}
|
||||
return matchList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to search for a tag value that matches exactly (the entire string)
|
||||
* with the pattern.
|
||||
*
|
||||
* @param list
|
||||
* the list of elements
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @param tagMatcher
|
||||
* the tag matcher (which will be matched against the pattern and
|
||||
* used on {@link MatchedElement})
|
||||
* @return an list of {@link MatchedElement}
|
||||
*/
|
||||
private <T extends Tag, E extends PageElement<T>> List<MatchedElement<T>> match(
|
||||
List<E> list, Pattern pattern, TagMatcher<T> tagMatcher) {
|
||||
return match(list, pattern, tagMatcher, tagMatcher);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to search for a tag value that contains the content within the
|
||||
* pattern.
|
||||
*
|
||||
* @param list
|
||||
* the list of elements
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @param tagMatcher
|
||||
* the tag matcher (which will be matched against the pattern and
|
||||
* used on {@link MatchedElement})
|
||||
* @return an list of {@link MatchedElement}
|
||||
*/
|
||||
|
||||
private <T extends Tag, E extends PageElement<T>> List<MatchedElement<T>> find(
|
||||
List<E> list, Pattern pattern, TagMatcher<T> tagMatcher) {
|
||||
final List<MatchedElement<T>> matchList = new ArrayList<>();
|
||||
for (final E tag : list) {
|
||||
final MatchedElement<T> matched = tag.match(pattern, tagMatcher);
|
||||
if (matched.find())
|
||||
matchList.add(matched);
|
||||
}
|
||||
return matchList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a single element from the list
|
||||
*
|
||||
* @param list
|
||||
* the list
|
||||
* @return the first element at the list
|
||||
*/
|
||||
private <O> O single(List<O> list) {
|
||||
if (list.size() == 0)
|
||||
return null;
|
||||
return list.get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the HTML page to a plain string. This is similar to the
|
||||
* "SEO preview" systems
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String asPlainString() {
|
||||
String string = nodes.asString().replaceAll(" ", "");
|
||||
final String[] lines = string.split("\n");
|
||||
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
for (final String line : lines) {
|
||||
String procLine = line.replaceAll("\t", " ").trim();
|
||||
if (procLine.length() == 0)
|
||||
continue;
|
||||
builder.append(line.replaceAll("\t", " ").trim()).append(" ");
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** TEXT SEARCH
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* Searches for the given pattern at the entire page
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @return the search results
|
||||
*/
|
||||
public SearchResults search(Pattern pattern) {
|
||||
return new SearchResults(pattern, asPlainString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches for the given text at the entire page
|
||||
*
|
||||
* @param text
|
||||
* the text
|
||||
* @return the search results
|
||||
*/
|
||||
public SearchResults searchFirst(String text) {
|
||||
return search(Pattern.compile(Pattern.quote(text)));
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** LINKS
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* An {@link TagMatcher} that returns the link href
|
||||
*/
|
||||
private static final TagMatcher<LinkTag> LINK_TAG_MATCHER = new TagMatcher<LinkTag>() {
|
||||
@Override
|
||||
public String content(LinkTag tag) {
|
||||
return tag.getLink();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @return a list of all links contained at the page
|
||||
*/
|
||||
public List<PageElement<LinkTag>> links() {
|
||||
return filter(new DefaultListProcessor<LinkTag>(), new TypeTagFilter(
|
||||
LinkTag.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all links whose URL matches the given pattern
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @return the list of links matching the pattern
|
||||
*/
|
||||
public List<MatchedElement<LinkTag>> links(Pattern pattern) {
|
||||
return match(links(), pattern, LINK_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the first link whose URL matches the given pattern
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @return the first link matching the pattern
|
||||
*/
|
||||
public MatchedElement<LinkTag> link(Pattern pattern) {
|
||||
return single(links(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @return the links whose IDs matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<LinkTag>> linksByID(Pattern pattern) {
|
||||
return match(links(), pattern, new IDTagMatcher<LinkTag>(),
|
||||
LINK_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id
|
||||
* the link ID
|
||||
* @return the link with the given ID
|
||||
*/
|
||||
public MatchedElement<LinkTag> linkByID(String id) {
|
||||
return single(linksByID(Pattern.compile(Pattern.quote(id))));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the name pattern
|
||||
* @return the links whose name matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<LinkTag>> linksByName(Pattern pattern) {
|
||||
return match(links(), pattern, new NameTagMatcher<LinkTag>(),
|
||||
LINK_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* the name
|
||||
* @return the link with the given name
|
||||
*/
|
||||
public MatchedElement<LinkTag> linkByName(String name) {
|
||||
return single(linksByName(Pattern.compile(Pattern.quote(name))));
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** IMAGES
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* An {@link TagMatcher} that returns the image source url
|
||||
*/
|
||||
private static final TagMatcher<ImageTag> IMAGE_TAG_MATCHER = new TagMatcher<ImageTag>() {
|
||||
@Override
|
||||
public String content(ImageTag tag) {
|
||||
return tag.getImageURL();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @return the list of all images at the page
|
||||
*/
|
||||
public List<PageElement<ImageTag>> images() {
|
||||
return filter(new DefaultListProcessor<ImageTag>(), new TypeTagFilter(
|
||||
ImageTag.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the image url pattern
|
||||
* @return the list of images matching the url pattern
|
||||
*/
|
||||
public List<MatchedElement<ImageTag>> images(Pattern pattern) {
|
||||
return match(images(), pattern, IMAGE_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the image url pattern
|
||||
* @return the first image whose url matches the pattern
|
||||
*/
|
||||
public MatchedElement<ImageTag> image(Pattern pattern) {
|
||||
return single(images(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the pattern id
|
||||
* @return the list of images that match the given id
|
||||
*/
|
||||
public List<MatchedElement<ImageTag>> imagesByID(Pattern pattern) {
|
||||
return match(images(), pattern, new IDTagMatcher<ImageTag>(),
|
||||
IMAGE_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id
|
||||
* the image ID
|
||||
* @return the image that matches with the given id
|
||||
*/
|
||||
public MatchedElement<ImageTag> imageByID(String id) {
|
||||
return single(imagesByID(Pattern.compile(Pattern.quote(id))));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the image name pattern
|
||||
* @return the list of images whose names match the pattern
|
||||
*/
|
||||
public List<MatchedElement<ImageTag>> imagesByName(Pattern pattern) {
|
||||
return match(images(), pattern, new NameTagMatcher<ImageTag>(),
|
||||
IMAGE_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* the image name
|
||||
* @return the image whose name matches the given
|
||||
*/
|
||||
public MatchedElement<ImageTag> imageByName(String name) {
|
||||
return single(imagesByName(Pattern.compile(Pattern.quote(name))));
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** FORM
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* An {@link TagMatcher} that returns the form action (or submit) url
|
||||
*/
|
||||
private static final TagMatcher<FormTag> FORM_TAG_MATCHER = new TagMatcher<FormTag>() {
|
||||
@Override
|
||||
public String content(FormTag tag) {
|
||||
return tag.getFormLocation();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @return the list of all forms on the page
|
||||
*/
|
||||
public List<PageElement<FormTag>> forms() {
|
||||
return filter(new DefaultListProcessor<FormTag>(), new TypeTagFilter(
|
||||
FormTag.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the action url pattern
|
||||
* @return the forms whose urls matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<FormTag>> forms(Pattern pattern) {
|
||||
return match(forms(), pattern, FORM_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the action url pattern
|
||||
* @return the first form whose action url matches the pattern
|
||||
*/
|
||||
public MatchedElement<FormTag> form(Pattern pattern) {
|
||||
return single(forms(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the form id pattern
|
||||
* @return the forms whose ids matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<FormTag>> formsByID(Pattern pattern) {
|
||||
return match(forms(), pattern, new IDTagMatcher<FormTag>(),
|
||||
FORM_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id
|
||||
* the form id
|
||||
* @return the form whose id matches the given
|
||||
*/
|
||||
public MatchedElement<FormTag> formByID(String id) {
|
||||
return single(formsByID(Pattern.compile(Pattern.quote(id))));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the form name pattern
|
||||
* @return the forms whose names matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<FormTag>> formsByName(Pattern pattern) {
|
||||
return match(forms(), pattern, new NameTagMatcher<FormTag>(),
|
||||
FORM_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* the form name
|
||||
* @return the form whose name matches the given
|
||||
*/
|
||||
public MatchedElement<FormTag> formByName(String name) {
|
||||
return single(formsByName(Pattern.compile(Pattern.quote(name))));
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** INPUT
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* An {@link TagMatcher} that returns the input value
|
||||
*/
|
||||
private static final TagMatcher<InputTag> INPUT_TAG_MATCHER = new TagMatcher<InputTag>() {
|
||||
@Override
|
||||
public String content(InputTag tag) {
|
||||
return tag.getAttribute("value");
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @return the list of all inputs on the page
|
||||
*/
|
||||
public List<PageElement<InputTag>> inputs() {
|
||||
return filter(new DefaultListProcessor<InputTag>(), new TypeTagFilter(
|
||||
InputTag.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the input value pattern
|
||||
* @return the inputs whose values matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<InputTag>> inputs(Pattern pattern) {
|
||||
return find(inputs(), pattern, INPUT_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the action url pattern
|
||||
* @return the first input whose value matches the pattern
|
||||
*/
|
||||
public MatchedElement<InputTag> input(Pattern pattern) {
|
||||
return single(inputs(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the input id pattern
|
||||
* @return the inputs whose ids matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<InputTag>> inputsByID(Pattern pattern) {
|
||||
return match(inputs(), pattern, new IDTagMatcher<InputTag>(),
|
||||
INPUT_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* the input id
|
||||
* @return the input whose id matches the given
|
||||
*/
|
||||
public MatchedElement<InputTag> inputByID(String id) {
|
||||
return single(inputsByID(Pattern.compile(Pattern.quote(id))));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the input name pattern
|
||||
* @return the inputs whose name matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<InputTag>> inputsByName(Pattern pattern) {
|
||||
return match(inputs(), pattern, new NameTagMatcher<InputTag>(),
|
||||
INPUT_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* the input name
|
||||
* @return the input whose name matches the given
|
||||
*/
|
||||
public MatchedElement<InputTag> inputByName(String name) {
|
||||
return single(inputsByName(Pattern.compile(Pattern.quote(name))));
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** TEXTAREA
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* An {@link TagMatcher} that returns the textarea value
|
||||
*/
|
||||
private static final TagMatcher<TextareaTag> TEXTAREA_TAG_MATCHER = new TagMatcher<TextareaTag>() {
|
||||
@Override
|
||||
public String content(TextareaTag tag) {
|
||||
return tag.getStringText();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @return the list of all textareas on the page
|
||||
*/
|
||||
public List<PageElement<TextareaTag>> textareas() {
|
||||
return filter(new DefaultListProcessor<TextareaTag>(),
|
||||
new TypeTagFilter(TextareaTag.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the textarea value pattern
|
||||
* @return the textareas whose values matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<TextareaTag>> textareas(Pattern pattern) {
|
||||
return match(textareas(), pattern, TEXTAREA_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the textarea value pattern
|
||||
* @return the first textarea whose value matches the pattern
|
||||
*/
|
||||
public MatchedElement<TextareaTag> textarea(Pattern pattern) {
|
||||
return single(textareas(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the textarea id pattern
|
||||
* @return the textareas whose ids matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<TextareaTag>> textareasByID(Pattern pattern) {
|
||||
return match(textareas(), pattern, new IDTagMatcher<TextareaTag>(),
|
||||
TEXTAREA_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* the textarea id
|
||||
* @return the textarea whose id matches the given
|
||||
*/
|
||||
public MatchedElement<TextareaTag> textareaByID(String id) {
|
||||
return single(textareasByID(Pattern.compile(Pattern.quote(id))));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the textarea name pattern
|
||||
* @return the textareas whose name matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<TextareaTag>> textareasByName(Pattern pattern) {
|
||||
return match(textareas(), pattern, new NameTagMatcher<TextareaTag>(),
|
||||
TEXTAREA_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* the textarea name
|
||||
* @return the textarea whose name matches the given
|
||||
*/
|
||||
public MatchedElement<TextareaTag> textareaByName(String name) {
|
||||
return single(textareasByName(Pattern.compile(Pattern.quote(name))));
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** JAVASCRIPT
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* An {@link TagMatcher} that returns the script code
|
||||
*/
|
||||
public List<PageElement<ScriptTag>> scripts() {
|
||||
return filter(new DefaultListProcessor<ScriptTag>(), new TypeTagFilter(
|
||||
ScriptTag.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the list of all scripts on the page
|
||||
*/
|
||||
public List<MatchedElement<ScriptTag>> scripts(Pattern pattern) {
|
||||
return find(scripts(), pattern, new TagMatcher<ScriptTag>() {
|
||||
@Override
|
||||
public String content(ScriptTag tag) {
|
||||
return tag.getScriptCode();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the script code pattern
|
||||
* @return the first script whose code matches the pattern
|
||||
*/
|
||||
public MatchedElement<ScriptTag> script(Pattern pattern) {
|
||||
return single(scripts(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the script url pattern
|
||||
* @return the scripts whose urls matches the pattern
|
||||
*/
|
||||
public MatchedElement<ScriptTag> scriptBySource(Pattern pattern) {
|
||||
return single(match(scripts(), pattern, new TagMatcher<ScriptTag>() {
|
||||
@Override
|
||||
public String content(ScriptTag tag) {
|
||||
return tag.getAttribute("src");
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** FRAME
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* An {@link TagMatcher} that returns the frame url
|
||||
*/
|
||||
private static final TagMatcher<FrameTag> FRAME_TAG_MATCHER = new TagMatcher<FrameTag>() {
|
||||
@Override
|
||||
public String content(FrameTag tag) {
|
||||
return tag.getFrameLocation();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @return the list of all frames on the page
|
||||
*/
|
||||
public List<PageElement<FrameTag>> frames() {
|
||||
return filter(new DefaultListProcessor<FrameTag>(), new TypeTagFilter(
|
||||
FrameTag.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the frame url pattern
|
||||
* @return the frames whose urls matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<FrameTag>> frames(Pattern pattern) {
|
||||
return match(frames(), pattern, FRAME_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the frame url pattern
|
||||
* @return the first frame whose url matches the pattern
|
||||
*/
|
||||
public MatchedElement<FrameTag> frame(Pattern pattern) {
|
||||
return single(frames(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the frame id pattern
|
||||
* @return the frames whose id matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<FrameTag>> framesByID(Pattern pattern) {
|
||||
return match(frames(), pattern, new IDTagMatcher<FrameTag>(),
|
||||
FRAME_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* the frame id
|
||||
* @return the frame whose id matches the given
|
||||
*/
|
||||
public MatchedElement<FrameTag> frameByID(String id) {
|
||||
return single(framesByID(Pattern.compile(Pattern.quote(id))));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern
|
||||
* the frame name pattern
|
||||
* @return the frames whose name matches the pattern
|
||||
*/
|
||||
public List<MatchedElement<FrameTag>> framesByName(Pattern pattern) {
|
||||
return match(frames(), pattern, new NameTagMatcher<FrameTag>(),
|
||||
FRAME_TAG_MATCHER);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param name
|
||||
* the frame name
|
||||
* @return the frame whose name matches the given
|
||||
*/
|
||||
public MatchedElement<FrameTag> frameByName(String name) {
|
||||
return single(framesByName(Pattern.compile(Pattern.quote(name))));
|
||||
}
|
||||
|
||||
/*
|
||||
* ************************************************************************
|
||||
* ***** INITIALIZERS
|
||||
* ************************************************************************
|
||||
*/
|
||||
/**
|
||||
* Creates a new page parsing the HTML input
|
||||
*
|
||||
* @param html
|
||||
* the html code
|
||||
* @return the newly created {@link Page} object
|
||||
*/
|
||||
public static Page parse(String html) {
|
||||
try {
|
||||
return new Page(Parser.createParser(html, null));
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return nodes.toHtml(false);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.html;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Tag;
|
||||
|
||||
/**
|
||||
* An element that represents an tag on the page
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class PageElement<T extends Tag> {
|
||||
/**
|
||||
* The tag represented by this element
|
||||
*/
|
||||
protected final T tag;
|
||||
|
||||
/**
|
||||
* Creates a new instance
|
||||
*
|
||||
* @param tag
|
||||
* the tag
|
||||
*/
|
||||
public PageElement(T tag) {
|
||||
this.tag = tag;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to match the element with a given pattern
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @return the matched element
|
||||
*/
|
||||
public MatchedElement<T> match(Pattern pattern) {
|
||||
return match(pattern, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to match the element with a given pattern using an alternative
|
||||
* {@link TagMatcher}
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @param tagMatcher
|
||||
* the tag matcher
|
||||
* @return the matched element
|
||||
*/
|
||||
public MatchedElement<T> match(Pattern pattern, TagMatcher<T> tagMatcher) {
|
||||
if (tagMatcher == null) {
|
||||
tagMatcher = new TagMatcher<T>() {
|
||||
@Override
|
||||
public String content(T tag) {
|
||||
return tag.toHtml();
|
||||
}
|
||||
};
|
||||
}
|
||||
final String content = tagMatcher.content(tag);
|
||||
if (content == null)
|
||||
return null;
|
||||
return new MatchedElement<T>(tag, pattern, tagMatcher.content(tag));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to match the element with itself (return a {@link MatchedElement}
|
||||
* that always matched it self)
|
||||
*
|
||||
* @param tagMatcher
|
||||
* the tag matcher
|
||||
* @return always an {@link MatchedElement} whose group 0 matches it self
|
||||
*/
|
||||
public MatchedElement<T> match(TagMatcher<T> tagMatcher) {
|
||||
if (tagMatcher == null) {
|
||||
tagMatcher = new TagMatcher<T>() {
|
||||
@Override
|
||||
public String content(T tag) {
|
||||
return tag.toHtml();
|
||||
}
|
||||
};
|
||||
}
|
||||
final String content = tagMatcher.content(tag);
|
||||
if (content == null)
|
||||
return null;
|
||||
return new MatchedElement<T>(tag, tagMatcher.content(tag));
|
||||
}
|
||||
|
||||
/**
|
||||
* An tag matcher is an helper class that can return an value that the
|
||||
* matcher should use to test the pattern against it.
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*
|
||||
* @param <T>
|
||||
* the tag type
|
||||
*/
|
||||
public interface TagMatcher<T extends Tag> {
|
||||
String content(T tag);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the tag object
|
||||
*/
|
||||
public T tag() {
|
||||
return tag;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PageElement [tag=" + tag + "]";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.html;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Represents an search done against an page string
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class SearchResults {
|
||||
/**
|
||||
* The matcher
|
||||
*/
|
||||
private final Matcher matcher;
|
||||
|
||||
/**
|
||||
* Creates a new instance
|
||||
*
|
||||
* @param matcher
|
||||
* the matcher
|
||||
*/
|
||||
public SearchResults(Matcher matcher) {
|
||||
this.matcher = matcher;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new instance
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @param content
|
||||
* the content
|
||||
*/
|
||||
public SearchResults(Pattern pattern, String content) {
|
||||
this.matcher = pattern.matcher(content);
|
||||
this.matcher.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if the matcher has found any results
|
||||
*/
|
||||
public boolean hasResults() {
|
||||
matcher.reset();
|
||||
return matcher.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param n
|
||||
* the group number
|
||||
* @return <code>true</code> if the group exists
|
||||
*/
|
||||
public boolean hasGroup(int n) {
|
||||
return n <= matcher.groupCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the entire matched value as a string
|
||||
*/
|
||||
public String asString() {
|
||||
return asString(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the group value as a string
|
||||
*/
|
||||
public String asString(int n) {
|
||||
return matcher.group(n);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the entire matched value as a integer
|
||||
*/
|
||||
public int asInteger() {
|
||||
return asInteger(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the group value as a integer
|
||||
*/
|
||||
public int asInteger(int n) {
|
||||
return Integer.parseInt(asString(n));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the entire matched value as a long
|
||||
*/
|
||||
public long asLong() {
|
||||
return asLong(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the group value as a long
|
||||
*/
|
||||
public long asLong(int n) {
|
||||
return Long.parseLong(asString(n));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the entire matched value as a double
|
||||
*/
|
||||
public double asDouble() {
|
||||
return asDouble(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the group value as a double
|
||||
*/
|
||||
public double asDouble(int n) {
|
||||
return Double.parseDouble(asString(n));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the pattern matched against the element
|
||||
*/
|
||||
public Pattern getPattern() {
|
||||
return matcher.pattern();
|
||||
}
|
||||
}
|
||||
@@ -16,29 +16,36 @@
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
package com.rogiel.httpchannel.util.html.filter;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.Tag;
|
||||
|
||||
public class IDFilter implements NodeFilter {
|
||||
/**
|
||||
* An filter that selects all tags matching an given type
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class TypeTagFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final String id;
|
||||
/**
|
||||
* The tag type
|
||||
*/
|
||||
private final Class<? extends Tag> type;
|
||||
|
||||
public IDFilter(String id) {
|
||||
this.id = id;
|
||||
/**
|
||||
* Creates a new instance
|
||||
*
|
||||
* @param type
|
||||
* the tag type
|
||||
*/
|
||||
public TypeTagFilter(Class<? extends Tag> type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof Tag))
|
||||
return false;
|
||||
final Tag tag = (Tag) node;
|
||||
if (tag.getAttribute("id") == null)
|
||||
return false;
|
||||
if (!tag.getAttribute("id").equals(id))
|
||||
return false;
|
||||
return true;
|
||||
return type.isAssignableFrom(node.getClass());
|
||||
}
|
||||
}
|
||||
@@ -16,23 +16,21 @@
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
package com.rogiel.httpchannel.util.html.matcher;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
import org.htmlparser.Tag;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
|
||||
public class ContainsFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final Pattern content;
|
||||
|
||||
public ContainsFilter(Pattern content) {
|
||||
this.content = content;
|
||||
}
|
||||
import com.rogiel.httpchannel.util.html.PageElement.TagMatcher;
|
||||
|
||||
/**
|
||||
* An {@link TagMatcher} that always returns the tag ID
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class IDTagMatcher<T extends Tag> implements TagMatcher<T> {
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
return content.matcher(node.getText()).find();
|
||||
public String content(T tag) {
|
||||
return tag.getAttribute("id");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -16,23 +16,21 @@
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
package com.rogiel.httpchannel.util.html.matcher;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
import org.htmlparser.Tag;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
|
||||
public class ContainsInLowerCaseFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final Pattern content;
|
||||
|
||||
public ContainsInLowerCaseFilter(Pattern content) {
|
||||
this.content = content;
|
||||
}
|
||||
import com.rogiel.httpchannel.util.html.PageElement.TagMatcher;
|
||||
|
||||
/**
|
||||
* An {@link TagMatcher} that always returns the tag name
|
||||
*
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class NameTagMatcher<T extends Tag> implements TagMatcher<T> {
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
return content.matcher(node.getText().toLowerCase()).find();
|
||||
public String content(T tag) {
|
||||
return tag.getAttribute("name");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.tags.FormTag;
|
||||
|
||||
public class FormActionPatternFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final Pattern pattern;
|
||||
|
||||
public FormActionPatternFilter(Pattern pattern) {
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof FormTag))
|
||||
return false;
|
||||
final FormTag form = (FormTag) node;
|
||||
return pattern.matcher(form.getFormLocation()).matches();
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.nodes.TagNode;
|
||||
|
||||
public class FramePatternFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final Pattern pattern;
|
||||
|
||||
public FramePatternFilter(Pattern pattern) {
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof TagNode))
|
||||
return false;
|
||||
final TagNode frame = (TagNode) node;
|
||||
if (frame.getAttribute("src") == null)
|
||||
return false;
|
||||
return pattern.matcher(frame.getAttribute("src")).matches();
|
||||
}
|
||||
}
|
||||
@@ -1,304 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.Parser;
|
||||
import org.htmlparser.Tag;
|
||||
import org.htmlparser.filters.AndFilter;
|
||||
import org.htmlparser.nodes.TagNode;
|
||||
import org.htmlparser.tags.FormTag;
|
||||
import org.htmlparser.tags.ImageTag;
|
||||
import org.htmlparser.tags.InputTag;
|
||||
import org.htmlparser.tags.LinkTag;
|
||||
import org.htmlparser.tags.ScriptTag;
|
||||
import org.htmlparser.tags.TextareaTag;
|
||||
import org.htmlparser.util.NodeIterator;
|
||||
import org.htmlparser.util.NodeList;
|
||||
import org.htmlparser.util.ParserException;
|
||||
|
||||
/**
|
||||
* @author <a href="http://www.rogiel.com">Rogiel</a>
|
||||
*/
|
||||
public class HTMLPage {
|
||||
private final NodeList nodes;
|
||||
|
||||
private HTMLPage(Parser parser) throws ParserException {
|
||||
this.nodes = parser.parse(null);
|
||||
}
|
||||
|
||||
private <T extends Node> List<T> filter(final Class<T> nodeType,
|
||||
NodeFilter... filters) {
|
||||
final NodeFilter filter;
|
||||
if (filters.length == 1)
|
||||
filter = filters[0];
|
||||
else
|
||||
filter = new AndFilter(filters);
|
||||
try {
|
||||
return list(nodes.extractAllNodesThatMatch(filter, true));
|
||||
} catch (ParserException e) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private <T extends Node> List<T> list(final NodeList list)
|
||||
throws ParserException {
|
||||
final List<T> filtered = new ArrayList<>();
|
||||
final NodeIterator iterator = list.elements();
|
||||
while (iterator.hasMoreNodes()) {
|
||||
filtered.add((T) iterator.nextNode());
|
||||
}
|
||||
return filtered;
|
||||
}
|
||||
|
||||
public boolean containsPlain(Pattern pattern) {
|
||||
return pattern.matcher(asString()).find();
|
||||
}
|
||||
|
||||
public boolean contains(final Pattern pattern) {
|
||||
return !filter(Node.class, new ContainsFilter(pattern)).isEmpty();
|
||||
}
|
||||
|
||||
public boolean contains(final String text) {
|
||||
return contains(Pattern.compile(Pattern.quote(text)));
|
||||
}
|
||||
|
||||
public boolean containsIgnoreCase(final String text) {
|
||||
return !filter(
|
||||
Node.class,
|
||||
new ContainsInLowerCaseFilter(Pattern.compile(Pattern
|
||||
.quote(text.toLowerCase())))).isEmpty();
|
||||
}
|
||||
|
||||
public String findPlain(final Pattern pattern, int n) {
|
||||
final Matcher matcher = pattern.matcher(asString());
|
||||
if (matcher.find())
|
||||
return matcher.group(n);
|
||||
return null;
|
||||
}
|
||||
|
||||
public int findIntPlain(final Pattern pattern, int n) {
|
||||
return Integer.parseInt(findPlain(pattern, n));
|
||||
}
|
||||
|
||||
public double findDoublePlain(final Pattern pattern, int n) {
|
||||
return Double.parseDouble(findPlain(pattern, n));
|
||||
}
|
||||
|
||||
public String find(final Pattern pattern, int n) {
|
||||
for (final Node tag : filter(Tag.class, new ContainsFilter(pattern))) {
|
||||
final Matcher matcher = pattern.matcher(tag.getText());
|
||||
if (matcher.find())
|
||||
return matcher.group(n);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public int findAsInt(final Pattern pattern, int n) {
|
||||
String found = find(pattern, n);
|
||||
if (found == null)
|
||||
return 0;
|
||||
return Integer.parseInt(findScript(pattern, n));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to find a link that has an URI following the given pattern
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @return the link content, if found. <code>null</code> otherwise
|
||||
*/
|
||||
public String findLink(final Pattern pattern) {
|
||||
for (final LinkTag tag : filter(LinkTag.class, new LinkPatternFilter(
|
||||
pattern))) {
|
||||
return tag.getLink();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to find a frame that has an URI following the given pattern
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @return the iframe uri, if found. <code>null</code> otherwise
|
||||
*/
|
||||
public String findFrame(final Pattern pattern) {
|
||||
for (final TagNode tag : filter(TagNode.class, new FramePatternFilter(
|
||||
pattern))) {
|
||||
return tag.getAttribute("src");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to find a image that has an URI following the given pattern
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @return the iframe uri, if found. <code>null</code> otherwise
|
||||
*/
|
||||
public String findImage(final Pattern pattern) {
|
||||
for (final ImageTag tag : filter(ImageTag.class,
|
||||
new ImagePatternFilter(pattern))) {
|
||||
return tag.getImageURL();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to find a form which has an location that respects the given
|
||||
* pattern
|
||||
*
|
||||
* @param pattern
|
||||
* the pattern
|
||||
* @return the URI found, if any. <code>null</code> otherwise
|
||||
*/
|
||||
public String findFormAction(final Pattern pattern) {
|
||||
for (final FormTag tag : filter(FormTag.class,
|
||||
new FormActionPatternFilter(pattern))) {
|
||||
return tag.getFormLocation();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String inputValue(List<InputTag> tags) {
|
||||
for (final InputTag tag : tags) {
|
||||
return tag.getAttribute("value");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getInputValue(final String inputName) {
|
||||
return inputValue(filter(InputTag.class, new InputNameFilter(inputName)));
|
||||
}
|
||||
|
||||
public int getInputValueAsInt(final String inputName) {
|
||||
return Integer.parseInt(getInputValue(inputName));
|
||||
}
|
||||
|
||||
public String getInputValueById(final String id) {
|
||||
return inputValue(filter(InputTag.class, new InputIDFilter(id)));
|
||||
}
|
||||
|
||||
public int getInputValueByIdInt(final String id) {
|
||||
return Integer.parseInt(inputValue(filter(InputTag.class,
|
||||
new InputIDFilter(id))));
|
||||
}
|
||||
|
||||
public String getInputValue(final Pattern pattern) {
|
||||
return inputValue(filter(InputTag.class, new InputValuePatternFilter(
|
||||
pattern)));
|
||||
}
|
||||
|
||||
public String getTextareaValueById(String id) {
|
||||
return ((TextareaTag) getTagByID(id)).getStringText();
|
||||
}
|
||||
|
||||
public String getTextareaValueByName(String name) {
|
||||
return ((TextareaTag) getTagByName(name)).getStringText();
|
||||
}
|
||||
|
||||
public Tag getTagByID(final String id) {
|
||||
for (final Tag tag : filter(Tag.class, new IDFilter(id))) {
|
||||
return tag;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Tag getTagByName(final String name) {
|
||||
for (final Tag tag : filter(Tag.class, new NameFilter(name))) {
|
||||
return tag;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public String findScript(final Pattern pattern, int n) {
|
||||
for (final ScriptTag tag : filter(ScriptTag.class,
|
||||
new ScriptContainsFilter(pattern))) {
|
||||
final Matcher matcher = pattern.matcher(tag.getScriptCode());
|
||||
if (matcher.find())
|
||||
return matcher.group(n);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public String findScriptSrc(final Pattern pattern) {
|
||||
for (final ScriptTag tag : filter(ScriptTag.class, new ScriptSrcFilter(
|
||||
pattern))) {
|
||||
final Matcher matcher = pattern.matcher(tag.getAttribute("src"));
|
||||
if (matcher.matches())
|
||||
return matcher.group();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public int findScriptAsInt(final Pattern pattern, int n) {
|
||||
String found = findScript(pattern, n);
|
||||
if (found == null)
|
||||
return 0;
|
||||
return Integer.parseInt(found);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
// try {
|
||||
// return parser.parse(null).toHtml(false);
|
||||
// } catch (ParserException e1) {
|
||||
// return null;
|
||||
// }
|
||||
return nodes.toHtml(false);
|
||||
}
|
||||
|
||||
public static HTMLPage parse(String html) {
|
||||
try {
|
||||
return new HTMLPage(Parser.createParser(html, null));
|
||||
} catch (ParserException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public String asString() {
|
||||
StringBuffer buff = new StringBuffer();
|
||||
for (int i = 0; i < nodes.size(); i++) {
|
||||
// final String content = nodes.elementAt(i).toPlainTextString()
|
||||
// .replaceAll("\n", "").replaceAll("\\t", "").trim();
|
||||
// if (content.length() > 0) {
|
||||
// buff.append(" ").append(content);
|
||||
// }
|
||||
final String[] lines = nodes.elementAt(i).toPlainTextString()
|
||||
.split("\n");
|
||||
for (final String line : lines) {
|
||||
final String processed = line.trim();
|
||||
if (processed.length() > 0) {
|
||||
buff.append(line.trim()).append(" ");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return buff.toString();
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.tags.ImageTag;
|
||||
|
||||
public class ImagePatternFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final Pattern pattern;
|
||||
|
||||
public ImagePatternFilter(Pattern pattern) {
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof ImageTag))
|
||||
return false;
|
||||
final ImageTag frame = (ImageTag) node;
|
||||
return pattern.matcher(frame.getImageURL()).matches();
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.tags.InputTag;
|
||||
|
||||
public class InputIDFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final String id;
|
||||
|
||||
public InputIDFilter(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof InputTag))
|
||||
return false;
|
||||
final InputTag input = (InputTag) node;
|
||||
if (input.getAttribute("id") == null)
|
||||
return false;
|
||||
if (!input.getAttribute("id").equals(id))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.tags.InputTag;
|
||||
|
||||
public class InputNameFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final String name;
|
||||
|
||||
public InputNameFilter(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof InputTag))
|
||||
return false;
|
||||
final InputTag input = (InputTag) node;
|
||||
if (input.getAttribute("name") == null)
|
||||
return false;
|
||||
if (!input.getAttribute("name").equals(name))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.tags.InputTag;
|
||||
|
||||
public class InputValuePatternFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final Pattern pattern;
|
||||
|
||||
public InputValuePatternFilter(Pattern pattern) {
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof InputTag))
|
||||
return false;
|
||||
final InputTag input = (InputTag) node;
|
||||
if (input.getAttribute("value") == null)
|
||||
return false;
|
||||
if (!pattern.matcher(input.getAttribute("value")).matches())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.tags.LinkTag;
|
||||
|
||||
public class LinkPatternFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final Pattern pattern;
|
||||
|
||||
public LinkPatternFilter(Pattern pattern) {
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof LinkTag))
|
||||
return false;
|
||||
final LinkTag link = (LinkTag) node;
|
||||
return pattern.matcher(link.getLink()).matches();
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.Tag;
|
||||
|
||||
public class NameFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final String name;
|
||||
|
||||
public NameFilter(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof Tag))
|
||||
return false;
|
||||
final Tag tag = (Tag) node;
|
||||
if (tag.getAttribute("name") == null)
|
||||
return false;
|
||||
if (!tag.getAttribute("name").equals(name))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.tags.ScriptTag;
|
||||
|
||||
public class ScriptContainsFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final Pattern pattern;
|
||||
|
||||
public ScriptContainsFilter(Pattern pattern) {
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof ScriptTag))
|
||||
return false;
|
||||
final ScriptTag script = (ScriptTag) node;
|
||||
return pattern.matcher(script.getScriptCode()).find();
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.rogiel.httpchannel.util.htmlparser;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.htmlparser.Node;
|
||||
import org.htmlparser.NodeFilter;
|
||||
import org.htmlparser.tags.ScriptTag;
|
||||
|
||||
public class ScriptSrcFilter implements NodeFilter {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private final Pattern pattern;
|
||||
|
||||
public ScriptSrcFilter(Pattern pattern) {
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accept(Node node) {
|
||||
if (!(node instanceof ScriptTag))
|
||||
return false;
|
||||
final ScriptTag script = (ScriptTag) node;
|
||||
if (script.getAttribute("src") == null)
|
||||
return false;
|
||||
return pattern.matcher(script.getAttribute("src")).matches();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user