From 03869063566f1da1447705ae1485c757c581736d Mon Sep 17 00:00:00 2001 From: Rogiel Sulzbach Date: Sun, 6 May 2012 16:04:56 -0300 Subject: [PATCH] Implements a new, more clean and robust HTML parser --- .../httpchannel/service/AccountDetails.java | 14 + .../service/twoshared/TwoSharedService.java | 22 +- .../service/fourshared/FourSharedService.java | 14 +- .../depositfiles/DepositFilesService.java | 30 +- .../service/hotfile/HotFileService.java | 111 ++- .../service/ifile/IFileService.java | 10 +- .../service/megaupload/MegaUploadService.java | 18 +- .../multiupload/MultiUploadService.java | 23 +- .../service/uploadhere/UploadHereService.java | 20 +- .../service/uploadking/UploadKingService.java | 20 +- .../service/uptobox/UptoboxService.java | 85 +- ...java => UptoboxUploaderConfiguration.java} | 4 +- .../captcha/ReCaptchaExtractor.java | 11 +- .../com/rogiel/httpchannel/http/Request.java | 12 +- .../httpchannel/util/HttpClientUtils.java | 6 +- .../httpchannel/util/html/MatchedElement.java | 174 ++++ .../rogiel/httpchannel/util/html/Page.java | 841 ++++++++++++++++++ .../httpchannel/util/html/PageElement.java | 129 +++ .../httpchannel/util/html/SearchResults.java | 137 +++ .../filter/TypeTagFilter.java} | 33 +- .../matcher/IDTagMatcher.java} | 74 +- .../matcher/NameTagMatcher.java} | 74 +- .../htmlparser/FormActionPatternFilter.java | 42 - .../util/htmlparser/FramePatternFilter.java | 44 - .../httpchannel/util/htmlparser/HTMLPage.java | 304 ------- .../util/htmlparser/ImagePatternFilter.java | 42 - .../util/htmlparser/InputIDFilter.java | 44 - .../util/htmlparser/InputNameFilter.java | 44 - .../htmlparser/InputValuePatternFilter.java | 46 - .../util/htmlparser/LinkPatternFilter.java | 42 - .../util/htmlparser/NameFilter.java | 44 - .../util/htmlparser/ScriptContainsFilter.java | 42 - .../util/htmlparser/ScriptSrcFilter.java | 44 - 33 files changed, 1619 insertions(+), 981 deletions(-) rename httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/{UptoboxConfiguration.java => UptoboxUploaderConfiguration.java} (90%) create mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/MatchedElement.java create mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/Page.java create mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/PageElement.java create mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/SearchResults.java rename httpchannel-util/src/main/java/com/rogiel/httpchannel/util/{htmlparser/IDFilter.java => html/filter/TypeTagFilter.java} (64%) rename httpchannel-util/src/main/java/com/rogiel/httpchannel/util/{htmlparser/ContainsFilter.java => html/matcher/IDTagMatcher.java} (63%) rename httpchannel-util/src/main/java/com/rogiel/httpchannel/util/{htmlparser/ContainsInLowerCaseFilter.java => html/matcher/NameTagMatcher.java} (62%) delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/FormActionPatternFilter.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/FramePatternFilter.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/HTMLPage.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ImagePatternFilter.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputIDFilter.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputNameFilter.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputValuePatternFilter.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/LinkPatternFilter.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/NameFilter.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ScriptContainsFilter.java delete mode 100644 httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ScriptSrcFilter.java diff --git a/httpchannel-api/src/main/java/com/rogiel/httpchannel/service/AccountDetails.java b/httpchannel-api/src/main/java/com/rogiel/httpchannel/service/AccountDetails.java index 31551f6..9b60539 100644 --- a/httpchannel-api/src/main/java/com/rogiel/httpchannel/service/AccountDetails.java +++ b/httpchannel-api/src/main/java/com/rogiel/httpchannel/service/AccountDetails.java @@ -153,6 +153,20 @@ public interface AccountDetails { long getMaximumBandwidth(); } + /** + * Service accounts that has accounts with hotlink traffic should implement + * this interface + * + * @author Rogiel + */ + public interface HotLinkingAccountDetails extends AccountDetails { + /** + * @return the currently free hotlink traffic. -1 means no + * limit + */ + long getHotlinkTraffic(); + } + /** * Service accounts that has accounts with limited bandwidth should * implement this interface diff --git a/httpchannel-service/httpchannel-service-2shared/src/main/java/com/rogiel/httpchannel/service/twoshared/TwoSharedService.java b/httpchannel-service/httpchannel-service-2shared/src/main/java/com/rogiel/httpchannel/service/twoshared/TwoSharedService.java index cfd6843..8be4f15 100644 --- a/httpchannel-service/httpchannel-service-2shared/src/main/java/com/rogiel/httpchannel/service/twoshared/TwoSharedService.java +++ b/httpchannel-service/httpchannel-service-2shared/src/main/java/com/rogiel/httpchannel/service/twoshared/TwoSharedService.java @@ -51,7 +51,7 @@ import com.rogiel.httpchannel.service.exception.DownloadNotAuthorizedException; import com.rogiel.httpchannel.service.exception.DownloadNotResumableException; import com.rogiel.httpchannel.service.exception.NoCaptchaServiceException; import com.rogiel.httpchannel.util.ExceptionUtils; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; /** * This service handles uploads to TwoShared. @@ -165,7 +165,7 @@ public class TwoSharedService extends AbstractHttpService implements Service, AbstractUploader implements Uploader, LinkedUploadChannelCloseCallback { - private Future uploadFuture; + private Future uploadFuture; private String uploadID; public UploaderImpl(String filename, long filesize, @@ -176,12 +176,12 @@ public class TwoSharedService extends AbstractHttpService implements Service, @Override public UploadChannel openChannel() throws IOException { logger.debug("Starting upload to TwoShared"); - final HTMLPage page = get("http://www.2shared.com/").asPage(); + final Page page = get("http://www.2shared.com/").asPage(); // locate upload uri - final String uri = page.findFormAction(UPLOAD_URL_PATTERN); - final String mainDC = page.getInputValue("mainDC"); - uploadID = page.find(UPLOAD_ID_PATTERN, 1); + final String uri = page.form(UPLOAD_URL_PATTERN).asString(); + final String mainDC = page.inputByName("mainDC").asString(); + uploadID = page.search(UPLOAD_ID_PATTERN).asString(1); logger.debug("Upload URI: {}, DC: {}", uri, mainDC); @@ -198,10 +198,10 @@ public class TwoSharedService extends AbstractHttpService implements Service, public String finish() throws IOException { try { uploadFuture.get(); - final HTMLPage page = get( + final Page page = get( "http://www.2shared.com/uploadComplete.jsp?sId=" + uploadID).asPage(); - return page.getTextareaValueById("downloadLink"); + return page.textareaByID("downloadLink").asString(); } catch (InterruptedException e) { return null; } catch (ExecutionException e) { @@ -232,9 +232,9 @@ public class TwoSharedService extends AbstractHttpService implements Service, DownloadLinkNotFoundException, DownloadLimitExceededException, DownloadNotAuthorizedException, DownloadNotResumableException, UnsolvableCaptchaServiceException, NoCaptchaServiceException { - final HTMLPage page = get(uri).asPage(); - final String downloadUri = page.findScript( - DIRECT_DOWNLOAD_URL_PATTERN, 0); + final Page page = get(uri).asPage(); + final String downloadUri = page.script( + DIRECT_DOWNLOAD_URL_PATTERN).asString(); return download(get(downloadUri)); } } diff --git a/httpchannel-service/httpchannel-service-4shared/src/main/java/com/rogiel/httpchannel/service/fourshared/FourSharedService.java b/httpchannel-service/httpchannel-service-4shared/src/main/java/com/rogiel/httpchannel/service/fourshared/FourSharedService.java index 23106a8..fcec3d1 100644 --- a/httpchannel-service/httpchannel-service-4shared/src/main/java/com/rogiel/httpchannel/service/fourshared/FourSharedService.java +++ b/httpchannel-service/httpchannel-service-4shared/src/main/java/com/rogiel/httpchannel/service/fourshared/FourSharedService.java @@ -30,6 +30,9 @@ import com.rogiel.httpchannel.service.AbstractAuthenticator; import com.rogiel.httpchannel.service.AbstractHttpService; import com.rogiel.httpchannel.service.AbstractUploader; import com.rogiel.httpchannel.service.AccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.DiskQuotaAccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.FilesizeLimitAccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.AuthenticationService; import com.rogiel.httpchannel.service.Authenticator; import com.rogiel.httpchannel.service.AuthenticatorCapability; @@ -42,9 +45,6 @@ import com.rogiel.httpchannel.service.UploadChannel; import com.rogiel.httpchannel.service.UploadService; import com.rogiel.httpchannel.service.Uploader; import com.rogiel.httpchannel.service.UploaderCapability; -import com.rogiel.httpchannel.service.AccountDetails.DiskQuotaAccountDetails; -import com.rogiel.httpchannel.service.AccountDetails.FilesizeLimitAccountDetails; -import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback; import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration; @@ -52,7 +52,7 @@ import com.rogiel.httpchannel.service.config.NullUploaderConfiguration; import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialException; import com.rogiel.httpchannel.service.exception.ChannelServiceException; import com.rogiel.httpchannel.service.exception.DownloadLinkNotFoundException; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; /** * This service handles uploads to 4shared.com. @@ -168,7 +168,7 @@ public class FourSharedService extends AbstractHttpService implements Service, AbstractUploader implements Uploader, LinkedUploadChannelCloseCallback { - private Future uploadFuture; + private Future uploadFuture; public UploaderImpl(String filename, long filesize, NullUploaderConfiguration configuration) { @@ -210,8 +210,8 @@ public class FourSharedService extends AbstractHttpService implements Service, @Override public String finish() throws IOException { try { - final long linkID = Long.parseLong(uploadFuture.get() - .getInputValueById("uploadedFileId")); + final long linkID = uploadFuture.get() + .inputByID("uploadedFileId").asLong(); return api.getFileDownloadLink(account.getUsername(), getPassword(), linkID); } catch (InterruptedException e) { diff --git a/httpchannel-service/httpchannel-service-depositfiles/src/main/java/com/rogiel/httpchannel/service/depositfiles/DepositFilesService.java b/httpchannel-service/httpchannel-service-depositfiles/src/main/java/com/rogiel/httpchannel/service/depositfiles/DepositFilesService.java index 3ba3a33..554dd0b 100644 --- a/httpchannel-service/httpchannel-service-depositfiles/src/main/java/com/rogiel/httpchannel/service/depositfiles/DepositFilesService.java +++ b/httpchannel-service/httpchannel-service-depositfiles/src/main/java/com/rogiel/httpchannel/service/depositfiles/DepositFilesService.java @@ -48,7 +48,7 @@ import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadCh import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration; import com.rogiel.httpchannel.service.config.NullUploaderConfiguration; import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialException; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; /** * This service handles uploads to UploadKing.com. @@ -147,9 +147,10 @@ public class DepositFilesService extends AbstractHttpService implements @Override public CapabilityMatrix getAuthenticationCapability() { - return new CapabilityMatrix(AuthenticatorCapability.ACCOUNT_DETAILS); + return new CapabilityMatrix( + AuthenticatorCapability.ACCOUNT_DETAILS); } - + @Override public AccountDetails getAccountDetails() { return account; @@ -159,7 +160,7 @@ public class DepositFilesService extends AbstractHttpService implements AbstractUploader implements Uploader, LinkedUploadChannelCloseCallback { - private Future uploadFuture; + private Future uploadFuture; public UploaderImpl(String filename, long filesize, NullUploaderConfiguration configuration) { @@ -169,11 +170,13 @@ public class DepositFilesService extends AbstractHttpService implements @Override public UploadChannel openChannel() throws IOException { logger.debug("Starting upload to depositfiles.com"); - final HTMLPage page = get("http://www.depositfiles.com/").asPage(); + final Page page = get("http://www.depositfiles.com/").asPage(); - final String uri = page.findFormAction(UPLOAD_URI_PATTERN); - final String uploadID = page.getInputValue("UPLOAD_IDENTIFIER"); - final String maxFileSize = page.getInputValue("MAX_FILE_SIZE"); + final String uri = page.form(UPLOAD_URI_PATTERN).asString(); + final String uploadID = page.inputByName("UPLOAD_IDENTIFIER") + .asString(); + final String maxFileSize = page.formByName("MAX_FILE_SIZE") + .asString(); logger.debug("Upload URI: {}, ID: {}", uri, uploadID); @@ -189,8 +192,8 @@ public class DepositFilesService extends AbstractHttpService implements @Override public String finish() throws IOException { try { - final String link = uploadFuture.get().findScript( - DOWNLOAD_URI_PATTERN, 0); + final String link = uploadFuture.get() + .script(DOWNLOAD_URI_PATTERN).asString(); if (link == null) return null; return link; @@ -213,7 +216,7 @@ public class DepositFilesService extends AbstractHttpService implements @Override public AccountDetails login() throws IOException { logger.debug("Authenticating into depositfiles.com"); - HTMLPage page = post("http://depositfiles.com/login.php?return=%2F") + Page page = post("http://depositfiles.com/login.php?return=%2F") .parameter("go", true) .parameter("login", credential.getUsername()) .parameter("password", credential.getPassword()).asPage(); @@ -239,9 +242,10 @@ public class DepositFilesService extends AbstractHttpService implements throw new UnsolvableCaptchaServiceException(); } else { captchaService.valid(captcha); - if (!page.contains(VALID_LOGIN_REDIRECT)) + if (!page.search(VALID_LOGIN_REDIRECT).hasResults()) throw new AuthenticationInvalidCredentialException(); - return (account = new AccountDetailsImpl(credential.getUsername())); + return (account = new AccountDetailsImpl( + credential.getUsername())); } } diff --git a/httpchannel-service/httpchannel-service-hotfile/src/main/java/com/rogiel/httpchannel/service/hotfile/HotFileService.java b/httpchannel-service/httpchannel-service-hotfile/src/main/java/com/rogiel/httpchannel/service/hotfile/HotFileService.java index ef68ca9..904aea6 100644 --- a/httpchannel-service/httpchannel-service-hotfile/src/main/java/com/rogiel/httpchannel/service/hotfile/HotFileService.java +++ b/httpchannel-service/httpchannel-service-hotfile/src/main/java/com/rogiel/httpchannel/service/hotfile/HotFileService.java @@ -25,7 +25,6 @@ import java.util.concurrent.Future; import java.util.regex.Pattern; import org.apache.http.client.ClientProtocolException; -import org.htmlparser.Tag; import com.rogiel.httpchannel.service.AbstractAccountDetails; import com.rogiel.httpchannel.service.AbstractAuthenticator; @@ -33,6 +32,9 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader; import com.rogiel.httpchannel.service.AbstractHttpService; import com.rogiel.httpchannel.service.AbstractUploader; import com.rogiel.httpchannel.service.AccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.HotLinkingAccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.ReferralAccountDetails; import com.rogiel.httpchannel.service.AuthenticationService; import com.rogiel.httpchannel.service.Authenticator; import com.rogiel.httpchannel.service.AuthenticatorCapability; @@ -50,14 +52,15 @@ import com.rogiel.httpchannel.service.UploadChannel; import com.rogiel.httpchannel.service.UploadService; import com.rogiel.httpchannel.service.Uploader; import com.rogiel.httpchannel.service.UploaderCapability; -import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback; import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration; import com.rogiel.httpchannel.service.config.NullDownloaderConfiguration; import com.rogiel.httpchannel.service.config.NullUploaderConfiguration; import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialException; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.Filesizes; +import com.rogiel.httpchannel.util.html.Page; +import com.rogiel.httpchannel.util.html.SearchResults; /** * This service handles login, upload and download to HotFile.com. @@ -75,17 +78,31 @@ public class HotFileService extends AbstractHttpService implements Service, public static final ServiceID SERVICE_ID = ServiceID.create("hotfile"); private static final Pattern UPLOAD_URI_PATTERN = Pattern - .compile("http://u[0-9]*\\.hotfile\\.com/upload\\.cgi\\?[0-9]*"); + .compile("http[s]?://u[0-9]+\\.hotfile\\.com/upload\\.cgi\\?[0-9]*"); private static final Pattern DOWNLOAD_DIRECT_LINK_PATTERN = Pattern - .compile("http://hotfile\\.com/get/([0-9]*)/([A-Za-z0-9]*)/([A-Za-z0-9]*)/(.*)"); + .compile("http[s]?://hotfile\\.com/get/([0-9]+)/([A-Za-z0-9]+)/([A-Za-z0-9]+)/(.+)"); // private static final Pattern DOWNLOAD_TIMER = Pattern // .compile("timerend=d\\.getTime\\(\\)\\+([0-9]*);"); // private static final Pattern DOWNLOAD_FILESIZE = Pattern // .compile("[0-9]*(\\.[0-9]*)? (K|M|G)B"); private static final Pattern DOWNLOAD_URI_PATTERN = Pattern - .compile("http://hotfile\\.com/dl/([0-9]*)/([A-Za-z0-9]*)/(.*)"); + .compile("http[s]?://hotfile\\.com/dl/([0-9]+)/([A-Za-z0-9]+)/(.+)"); + + // account + private static final Pattern ACCOUNT_NAME_PATTERN = Pattern + .compile("User: ([^\\|]+)"); + + private static final Pattern ACCOUNT_TYPE_PATTERN = Pattern + .compile("Account: Free"); + + private static final Pattern HOTLINK_TRAFFIC_PATTERN = Pattern.compile( + "Hotlink traffic left: ([0-9]+(\\.[0-9]+))(K|M|G)b", + Pattern.CASE_INSENSITIVE); + + private static final Pattern REFERRAL_URL_PATTERN = Pattern + .compile("http[s]?://hotfile\\.com/register\\.html\\?reff=[0-9]+"); @Override public ServiceID getServiceID() { @@ -189,9 +206,10 @@ public class HotFileService extends AbstractHttpService implements Service, @Override public CapabilityMatrix getAuthenticationCapability() { - return new CapabilityMatrix(AuthenticatorCapability.ACCOUNT_DETAILS); + return new CapabilityMatrix( + AuthenticatorCapability.ACCOUNT_DETAILS); } - + @Override public AccountDetails getAccountDetails() { return account; @@ -201,7 +219,7 @@ public class HotFileService extends AbstractHttpService implements Service, AbstractUploader implements Uploader, LinkedUploadChannelCloseCallback { - private Future uploadFuture; + private Future uploadFuture; public UploaderImpl(String filename, long filesize, NullUploaderConfiguration configuration) { @@ -211,8 +229,8 @@ public class HotFileService extends AbstractHttpService implements Service, @Override public UploadChannel openChannel() throws IOException { logger.debug("Starting upload to hotfile.com"); - final HTMLPage page = get("http://www.hotfile.com/").asPage(); - final String action = page.findFormAction(UPLOAD_URI_PATTERN); + final Page page = get("http://www.hotfile.com/").asPage(); + final String action = page.form(UPLOAD_URI_PATTERN).asString(); logger.debug("Upload URI is {}", action); @@ -226,7 +244,8 @@ public class HotFileService extends AbstractHttpService implements Service, @Override public String finish() throws IOException { try { - return uploadFuture.get().getInputValue(DOWNLOAD_URI_PATTERN); + return uploadFuture.get().input(DOWNLOAD_URI_PATTERN) + .asString(); } catch (InterruptedException e) { return null; } catch (ExecutionException e) { @@ -245,7 +264,7 @@ public class HotFileService extends AbstractHttpService implements Service, public DownloadChannel openChannel(DownloadListener listener, long position) throws IOException { logger.debug("Downloading {} from hotfile.com", uri); - final HTMLPage page = get(uri).asPage(); + final Page page = get(uri).asPage(); // // try to find timer // final String stringTimer = PatternUtils.find(DOWNLOAD_TIMER, @@ -259,8 +278,8 @@ public class HotFileService extends AbstractHttpService implements Service, // + " milliseconds"); // } - final String downloadUrl = page - .findLink(DOWNLOAD_DIRECT_LINK_PATTERN); + final String downloadUrl = page.link(DOWNLOAD_DIRECT_LINK_PATTERN) + .asString(); logger.debug("Download link is {}", downloadUrl); // final String tmHash = PatternUtils.find(DOWNLOAD_TMHASH_PATTERN, // content);F @@ -284,15 +303,32 @@ public class HotFileService extends AbstractHttpService implements Service, public AccountDetails login() throws ClientProtocolException, IOException { logger.debug("Authenticating hotfile.com"); - HTMLPage page = post("http://www.hotfile.com/login.php") + Page page = post("http://www.hotfile.com/login.php") .parameter("returnto", "/index.php") .parameter("user", credential.getUsername()) .parameter("pass", credential.getPassword()).asPage(); - final Tag accountTag = page.getTagByID("account"); - if (accountTag == null) + page = get("http://www.hotfile.com/myreferals.html?lang=en") + .asPage(); + + final SearchResults usernameResults = page + .search(ACCOUNT_NAME_PATTERN); + if (!usernameResults.hasResults()) throw new AuthenticationInvalidCredentialException(); - return (account = new AccountDetailsImpl(credential.getUsername())); + + final String username = usernameResults.asString(1); + final String type = page.search(ACCOUNT_TYPE_PATTERN).asString(); + + final SearchResults trafficResults = page + .search(HOTLINK_TRAFFIC_PATTERN); + final long hotlinkTraffic = Filesizes.auto( + trafficResults.asDouble(1), trafficResults.asString(3)); + + final String referralURL = page.search(REFERRAL_URL_PATTERN) + .asString(); + + return (account = new AccountDetailsImpl(username, type == null, + hotlinkTraffic, referralURL)); } @Override @@ -304,19 +340,48 @@ public class HotFileService extends AbstractHttpService implements Service, } private class AccountDetailsImpl extends AbstractAccountDetails implements - PremiumAccountDetails { + PremiumAccountDetails, ReferralAccountDetails, + HotLinkingAccountDetails { + private final boolean premium; + private final long hotlinkTraffic; + private final String referralURL; + /** * @param username * the username + * @param premium + * whether the account is premium + * @param hotlinkTraffic + * the available hotlink traffic + * @param referralURL + * the referral url */ - public AccountDetailsImpl(String username) { + public AccountDetailsImpl(String username, boolean premium, + long hotlinkTraffic, String referralURL) { super(HotFileService.this, username); + this.premium = premium; + this.hotlinkTraffic = hotlinkTraffic; + this.referralURL = referralURL; } @Override public boolean isPremium() { - // TODO implement this - return false; + return premium; + } + + @Override + public long getHotlinkTraffic() { + return hotlinkTraffic; + } + + @Override + public int getMembersReferred() { + return -1; + } + + @Override + public String getReferralURL() { + return referralURL; } } diff --git a/httpchannel-service/httpchannel-service-ifileit/src/main/java/com/rogiel/httpchannel/service/ifile/IFileService.java b/httpchannel-service/httpchannel-service-ifileit/src/main/java/com/rogiel/httpchannel/service/ifile/IFileService.java index a804143..b1e67cf 100644 --- a/httpchannel-service/httpchannel-service-ifileit/src/main/java/com/rogiel/httpchannel/service/ifile/IFileService.java +++ b/httpchannel-service/httpchannel-service-ifileit/src/main/java/com/rogiel/httpchannel/service/ifile/IFileService.java @@ -36,7 +36,7 @@ import com.rogiel.httpchannel.service.UploaderCapability; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback; import com.rogiel.httpchannel.service.config.NullUploaderConfiguration; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; /** * This service handles login, upload and download to HotFile.com. @@ -115,7 +115,7 @@ public class IFileService extends AbstractHttpService implements Service, AbstractUploader implements Uploader, LinkedUploadChannelCloseCallback { - private Future uploadFuture; + private Future uploadFuture; public UploaderImpl(String filename, long filesize, NullUploaderConfiguration configuration) { @@ -125,9 +125,9 @@ public class IFileService extends AbstractHttpService implements Service, @Override public UploadChannel openChannel() throws IOException { logger.debug("Starting upload to ifile.it"); - final HTMLPage page = get("http://ifile.it/upload-classic.html") + final Page page = get("http://ifile.it/upload-classic.html") .asPage(); - final String action = page.findFormAction(UPLOAD_URI_PATTERN); + final String action = page.form(UPLOAD_URI_PATTERN).asString(); logger.debug("Upload URI is {}", action); @@ -141,7 +141,7 @@ public class IFileService extends AbstractHttpService implements Service, @Override public String finish() throws IOException { try { - return uploadFuture.get().getInputValue(DOWNLOAD_URI_PATTERN); + return uploadFuture.get().input(DOWNLOAD_URI_PATTERN).asString(); } catch (InterruptedException e) { return null; } catch (ExecutionException e) { diff --git a/httpchannel-service/httpchannel-service-megaupload/src/main/java/com/rogiel/httpchannel/service/megaupload/MegaUploadService.java b/httpchannel-service/httpchannel-service-megaupload/src/main/java/com/rogiel/httpchannel/service/megaupload/MegaUploadService.java index a5b8433..6107aac 100644 --- a/httpchannel-service/httpchannel-service-megaupload/src/main/java/com/rogiel/httpchannel/service/megaupload/MegaUploadService.java +++ b/httpchannel-service/httpchannel-service-megaupload/src/main/java/com/rogiel/httpchannel/service/megaupload/MegaUploadService.java @@ -34,6 +34,7 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader; import com.rogiel.httpchannel.service.AbstractHttpService; import com.rogiel.httpchannel.service.AbstractUploader; import com.rogiel.httpchannel.service.AccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.AuthenticationService; import com.rogiel.httpchannel.service.Authenticator; import com.rogiel.httpchannel.service.AuthenticatorCapability; @@ -51,7 +52,6 @@ import com.rogiel.httpchannel.service.UploadChannel; import com.rogiel.httpchannel.service.UploadService; import com.rogiel.httpchannel.service.Uploader; import com.rogiel.httpchannel.service.UploaderCapability; -import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback; import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration; @@ -60,7 +60,7 @@ import com.rogiel.httpchannel.service.exception.DownloadLimitExceededException; import com.rogiel.httpchannel.service.exception.DownloadLinkNotFoundException; import com.rogiel.httpchannel.util.HttpClientUtils; import com.rogiel.httpchannel.util.PatternUtils; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; /** * This service handles login, upload and download to MegaUpload.com. @@ -223,9 +223,9 @@ public class MegaUploadService extends AbstractHttpService implements Service, @Override public UploadChannel openChannel() throws IOException { logger.debug("Starting upload to megaupload.com"); - final HTMLPage page = get("http://www.megaupload.com/multiupload/") + final Page page = get("http://www.megaupload.com/multiupload/") .asPage(); - final String uri = page.findFormAction(UPLOAD_URL_PATTERN); + final String uri = page.form(UPLOAD_URL_PATTERN).asString(); logger.debug("Upload URI is {}", uri); final LinkedUploadChannel channel = createLinkedChannel(this); @@ -279,16 +279,16 @@ public class MegaUploadService extends AbstractHttpService implements Service, response = get(uri).request(); } - final HTMLPage page = HttpClientUtils.toPage(response); + final Page page = HttpClientUtils.toPage(response); // try to find timer - int timer = page.findScriptAsInt(DOWNLOAD_TIMER, 1); + int timer = page.script(DOWNLOAD_TIMER).asInteger(1); if (timer > 0 && configuration.getRespectWaitTime()) { logger.debug(""); timer(listener, timer * 1000); } final String downloadUrl = page - .findLink(DOWNLOAD_DIRECT_LINK_PATTERN); + .link(DOWNLOAD_DIRECT_LINK_PATTERN).asString(); if (downloadUrl != null && downloadUrl.length() > 0) { final HttpResponse downloadResponse = get(downloadUrl) .position(position).request(); @@ -322,12 +322,12 @@ public class MegaUploadService extends AbstractHttpService implements Service, @Override public AccountDetails login() throws IOException { logger.debug("Starting login to megaupload.com"); - final HTMLPage page = post("http://www.megaupload.com/?c=login") + final Page page = post("http://www.megaupload.com/?c=login") .parameter("login", true) .parameter("username", credential.getUsername()) .parameter("", credential.getPassword()).asPage(); - String username = page.findScript(LOGIN_USERNAME_PATTERN, 1); + String username = page.script(LOGIN_USERNAME_PATTERN).asString(1); if (username == null) throw new AuthenticationInvalidCredentialException(); return (account = new AccountDetailsImpl(credential.getUsername())); diff --git a/httpchannel-service/httpchannel-service-multiupload/src/main/java/com/rogiel/httpchannel/service/multiupload/MultiUploadService.java b/httpchannel-service/httpchannel-service-multiupload/src/main/java/com/rogiel/httpchannel/service/multiupload/MultiUploadService.java index cb709a6..2b85a94 100644 --- a/httpchannel-service/httpchannel-service-multiupload/src/main/java/com/rogiel/httpchannel/service/multiupload/MultiUploadService.java +++ b/httpchannel-service/httpchannel-service-multiupload/src/main/java/com/rogiel/httpchannel/service/multiupload/MultiUploadService.java @@ -31,6 +31,7 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader; import com.rogiel.httpchannel.service.AbstractHttpService; import com.rogiel.httpchannel.service.AbstractUploader; import com.rogiel.httpchannel.service.AccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.AuthenticationService; import com.rogiel.httpchannel.service.Authenticator; import com.rogiel.httpchannel.service.AuthenticatorCapability; @@ -48,7 +49,6 @@ import com.rogiel.httpchannel.service.UploadChannel; import com.rogiel.httpchannel.service.UploadService; import com.rogiel.httpchannel.service.Uploader; import com.rogiel.httpchannel.service.UploaderCapability; -import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback; import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration; @@ -60,8 +60,7 @@ import com.rogiel.httpchannel.service.exception.DownloadNotAuthorizedException; import com.rogiel.httpchannel.service.exception.DownloadNotResumableException; import com.rogiel.httpchannel.service.multiupload.MultiUploadUploaderConfiguration.MultiUploadMirrorService; import com.rogiel.httpchannel.util.PatternUtils; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; - +import com.rogiel.httpchannel.util.html.Page; /** * This service handles uploads to MultiUpload.nl. @@ -196,9 +195,10 @@ public class MultiUploadService extends AbstractHttpService implements Service, @Override public CapabilityMatrix getAuthenticationCapability() { - return new CapabilityMatrix(AuthenticatorCapability.ACCOUNT_DETAILS); + return new CapabilityMatrix( + AuthenticatorCapability.ACCOUNT_DETAILS); } - + @Override public AccountDetails getAccountDetails() { return account; @@ -219,7 +219,7 @@ public class MultiUploadService extends AbstractHttpService implements Service, public UploadChannel openChannel() throws IOException { logger.debug("Starting upload to multiupload.nl"); final String uri = get("http://www.multiupload.nl/").asPage() - .findFormAction(UPLOAD_URI_PATTERN); + .form(UPLOAD_URI_PATTERN).asString(); logger.debug("Upload URI is {}", uri); final LinkedUploadChannel channel = createLinkedChannel(this); @@ -273,8 +273,9 @@ public class MultiUploadService extends AbstractHttpService implements Service, long position) throws IOException, DownloadLinkNotFoundException, DownloadLimitExceededException, DownloadNotAuthorizedException, DownloadNotResumableException { - final HTMLPage page = get(uri).asPage(); - final String link = page.findLink(DIRECT_DOWNLOAD_LINK_PATTERN); + final Page page = get(uri).asPage(); + final String link = page.link(DIRECT_DOWNLOAD_LINK_PATTERN) + .asString(); logger.debug("Direct download link is {}", link); if (link == null) throw new DownloadLinkNotFoundException(); @@ -292,11 +293,13 @@ public class MultiUploadService extends AbstractHttpService implements Service, @Override public AccountDetails login() throws IOException { - final HTMLPage page = post("http://www.multiupload.nl/login") + final Page page = post("http://www.multiupload.nl/login") .parameter("username", credential.getUsername()) .parameter("password", credential.getPassword()).asPage(); - if (!page.containsIgnoreCase(credential.getUsername())) + if (page.search(Pattern.compile( + Pattern.quote(credential.getUsername()), + Pattern.CASE_INSENSITIVE)) != null) throw new AuthenticationInvalidCredentialException(); return (account = new AccountDetailsImpl(credential.getUsername())); } diff --git a/httpchannel-service/httpchannel-service-uploadhere/src/main/java/com/rogiel/httpchannel/service/uploadhere/UploadHereService.java b/httpchannel-service/httpchannel-service-uploadhere/src/main/java/com/rogiel/httpchannel/service/uploadhere/UploadHereService.java index 7a25a82..fb784ae 100644 --- a/httpchannel-service/httpchannel-service-uploadhere/src/main/java/com/rogiel/httpchannel/service/uploadhere/UploadHereService.java +++ b/httpchannel-service/httpchannel-service-uploadhere/src/main/java/com/rogiel/httpchannel/service/uploadhere/UploadHereService.java @@ -32,6 +32,7 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader; import com.rogiel.httpchannel.service.AbstractHttpService; import com.rogiel.httpchannel.service.AbstractUploader; import com.rogiel.httpchannel.service.AccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.AuthenticationService; import com.rogiel.httpchannel.service.Authenticator; import com.rogiel.httpchannel.service.AuthenticatorCapability; @@ -49,7 +50,6 @@ import com.rogiel.httpchannel.service.UploadChannel; import com.rogiel.httpchannel.service.UploadService; import com.rogiel.httpchannel.service.Uploader; import com.rogiel.httpchannel.service.UploaderCapability; -import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback; import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration; @@ -59,7 +59,7 @@ import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialE import com.rogiel.httpchannel.service.exception.DownloadLinkNotFoundException; import com.rogiel.httpchannel.service.exception.InvalidCaptchaException; import com.rogiel.httpchannel.util.PatternUtils; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; /** * This service handles uploads to UploadKing.com. @@ -215,11 +215,11 @@ public class UploadHereService extends AbstractHttpService implements Service, @Override public UploadChannel openChannel() throws IOException { - final HTMLPage page = get("http://www.uploadhere.com/").asPage(); + final Page page = get("http://www.uploadhere.com/").asPage(); - final String userCookie = page.getInputValueById("usercookie"); - final String uri = page.findFormAction(UPLOAD_URI_PATTERN); - final String uploadID = page.getInputValue("UPLOAD_IDENTIFIER"); + final String userCookie = page.inputByID("usercookie").asString(); + final String uri = page.form(UPLOAD_URI_PATTERN).asString(); + final String uploadID = page.inputByName("UPLOAD_IDENTIFIER").asString(); logger.debug("Upload URI: {}, UserCookie: {}, UploadID: {}", new Object[] { uri, userCookie, uploadID }); @@ -262,9 +262,9 @@ public class UploadHereService extends AbstractHttpService implements Service, @Override public DownloadChannel openChannel(DownloadListener listener, long position) throws IOException { - HTMLPage page = get(uri).asPage(); + Page page = get(uri).asPage(); - final int waitTime = page.findScriptAsInt(TIMER_PATTERN, 1) * 1000; + final int waitTime = page.script(TIMER_PATTERN).asInteger(1) * 1000; logger.debug("Wait time is {}", waitTime); timer(listener, waitTime); @@ -309,11 +309,11 @@ public class UploadHereService extends AbstractHttpService implements Service, @Override public AccountDetails login() throws IOException { - final HTMLPage page = post("http://www.uploadhere.com/login") + final Page page = post("http://www.uploadhere.com/login") .parameter("do", "login") .parameter("username", credential.getUsername()) .parameter("password", credential.getPassword()).asPage(); - if (page.contains(INVALID_LOGIN_STRING)) + if (page.searchFirst(INVALID_LOGIN_STRING).hasResults()) throw new AuthenticationInvalidCredentialException(); return (account = new AccountDetailsImpl(credential.getUsername())); } diff --git a/httpchannel-service/httpchannel-service-uploadking/src/main/java/com/rogiel/httpchannel/service/uploadking/UploadKingService.java b/httpchannel-service/httpchannel-service-uploadking/src/main/java/com/rogiel/httpchannel/service/uploadking/UploadKingService.java index 79aa16d..8d7b03d 100644 --- a/httpchannel-service/httpchannel-service-uploadking/src/main/java/com/rogiel/httpchannel/service/uploadking/UploadKingService.java +++ b/httpchannel-service/httpchannel-service-uploadking/src/main/java/com/rogiel/httpchannel/service/uploadking/UploadKingService.java @@ -32,6 +32,7 @@ import com.rogiel.httpchannel.service.AbstractHttpDownloader; import com.rogiel.httpchannel.service.AbstractHttpService; import com.rogiel.httpchannel.service.AbstractUploader; import com.rogiel.httpchannel.service.AccountDetails; +import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.AuthenticationService; import com.rogiel.httpchannel.service.Authenticator; import com.rogiel.httpchannel.service.AuthenticatorCapability; @@ -49,7 +50,6 @@ import com.rogiel.httpchannel.service.UploadChannel; import com.rogiel.httpchannel.service.UploadService; import com.rogiel.httpchannel.service.Uploader; import com.rogiel.httpchannel.service.UploaderCapability; -import com.rogiel.httpchannel.service.AccountDetails.PremiumAccountDetails; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel; import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadChannelCloseCallback; import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration; @@ -59,7 +59,7 @@ import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialE import com.rogiel.httpchannel.service.exception.DownloadLinkNotFoundException; import com.rogiel.httpchannel.service.exception.InvalidCaptchaException; import com.rogiel.httpchannel.util.PatternUtils; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; /** * This service handles uploads to zshare.net. @@ -215,11 +215,11 @@ public class UploadKingService extends AbstractHttpService implements Service, @Override public UploadChannel openChannel() throws IOException { - final HTMLPage page = get("http://www.uploadking.com/").asPage(); + final Page page = get("http://www.uploadking.com/").asPage(); - final String userCookie = page.getInputValueById("usercookie"); - final String uri = page.findFormAction(UPLOAD_URI_PATTERN); - final String uploadID = page.getInputValue("UPLOAD_IDENTIFIER"); + final String userCookie = page.inputByID("usercookie").asString(); + final String uri = page.form(UPLOAD_URI_PATTERN).asString(); + final String uploadID = page.inputByName("UPLOAD_IDENTIFIER").asString(); logger.debug("Upload URI: {}, UserCookie: {}, UploadID: {}", new Object[] { uri, userCookie, uploadID }); @@ -259,9 +259,9 @@ public class UploadKingService extends AbstractHttpService implements Service, @Override public DownloadChannel openChannel(DownloadListener listener, long position) throws IOException { - HTMLPage page = get(uri).asPage(); + Page page = get(uri).asPage(); - final int waitTime = page.findScriptAsInt(TIMER_PATTERN, 1) * 1000; + final int waitTime = page.script(TIMER_PATTERN).asInteger(1) * 1000; logger.debug("Wait time is {}", waitTime); timer(listener, waitTime); @@ -306,11 +306,11 @@ public class UploadKingService extends AbstractHttpService implements Service, @Override public AccountDetails login() throws IOException { - final HTMLPage page = post("http://www.uploadking.com/login") + final Page page = post("http://www.uploadking.com/login") .parameter("do", "login") .parameter("username", credential.getUsername()) .parameter("password", credential.getPassword()).asPage(); - if (page.contains(INVALID_LOGIN_STRING)) + if (page.searchFirst(INVALID_LOGIN_STRING).hasResults()) throw new AuthenticationInvalidCredentialException(); return (account = new AccountDetailsImpl(credential.getUsername())); } diff --git a/httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxService.java b/httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxService.java index 83027a8..ccbde2d 100644 --- a/httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxService.java +++ b/httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxService.java @@ -50,7 +50,8 @@ import com.rogiel.httpchannel.service.channel.LinkedUploadChannel.LinkedUploadCh import com.rogiel.httpchannel.service.config.NullAuthenticatorConfiguration; import com.rogiel.httpchannel.service.exception.AuthenticationInvalidCredentialException; import com.rogiel.httpchannel.util.Filesizes; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; +import com.rogiel.httpchannel.util.html.SearchResults; /** * This service handles login, upload and download to uptobox.com. @@ -59,7 +60,7 @@ import com.rogiel.httpchannel.util.htmlparser.HTMLPage; * @since 1.0 */ public class UptoboxService extends AbstractHttpService implements Service, - UploadService, + UploadService, AuthenticationService { /** * This service ID @@ -97,20 +98,20 @@ public class UptoboxService extends AbstractHttpService implements Service, } @Override - public Uploader getUploader(String filename, - long filesize, UptoboxConfiguration configuration) { + public Uploader getUploader(String filename, + long filesize, UptoboxUploaderConfiguration configuration) { return new UploaderImpl(filename, filesize, configuration); } @Override - public Uploader getUploader(String filename, + public Uploader getUploader(String filename, long filesize) { return getUploader(filename, filesize, newUploaderConfiguration()); } @Override - public UptoboxConfiguration newUploaderConfiguration() { - return new UptoboxConfiguration(); + public UptoboxUploaderConfiguration newUploaderConfiguration() { + return new UptoboxUploaderConfiguration(); } @Override @@ -164,28 +165,29 @@ public class UptoboxService extends AbstractHttpService implements Service, return account; } - protected class UploaderImpl extends AbstractUploader - implements Uploader, + protected class UploaderImpl extends + AbstractUploader implements + Uploader, LinkedUploadChannelCloseCallback { - private Future uploadFuture; + private Future uploadFuture; public UploaderImpl(String filename, long filesize, - UptoboxConfiguration configuration) { + UptoboxUploaderConfiguration configuration) { super(UptoboxService.this, filename, filesize, configuration); } @Override public UploadChannel openChannel() throws IOException { - logger.debug("Starting upload to ifile.it"); - final HTMLPage page = get("http://uptobox.com/").asPage(); - String action = page.findFormAction(UPLOAD_URI_PATTERN); - final String srvTmpUrl = page.getInputValue("srv_tmp_url"); + logger.debug("Starting upload to uptobox.com"); + final Page page = get("http://uptobox.com/").asPage(); + String action = page.form(UPLOAD_URI_PATTERN).asString(); + final String srvTmpUrl = page.inputByName("srv_tmp_url").asString(); if (account != null) { action += "&type=reg"; } - final String sessionID = page.getInputValue("sess_id"); + final String sessionID = page.inputByName("sess_id").asString(); logger.debug("Upload URI is {}", action); @@ -202,7 +204,7 @@ public class UptoboxService extends AbstractHttpService implements Service, @Override public String finish() throws IOException { try { - return uploadFuture.get().findLink(DOWNLOAD_URI_PATTERN); + return uploadFuture.get().link(DOWNLOAD_URI_PATTERN).asString(); } catch (InterruptedException e) { return null; } catch (ExecutionException e) { @@ -221,37 +223,42 @@ public class UptoboxService extends AbstractHttpService implements Service, @Override public AccountDetails login() throws IOException { - final HTMLPage page = post("http://uptobox.com/") + final Page page = post("http://uptobox.com/") .parameter("op", "login") .parameter("redirect", "http://uptobox.com/?op=my_account") .parameter("login", credential.getUsername()) .parameter("password", credential.getPassword()).asPage(); - final String username = page.findPlain( - Pattern.compile("Username:(.+) Apply"), 1); + final SearchResults results = page.search(Pattern + .compile("Username:(.+) Apply")); + if (!results.hasResults()) + throw new AuthenticationInvalidCredentialException(); + final String username = results.asString(1); if (username == null) throw new AuthenticationInvalidCredentialException(); - final boolean premium = !page.containsPlain(Pattern.compile( - "Account type Free member", Pattern.MULTILINE)); - final int points = page.findIntPlain( - Pattern.compile("You have collected:([0-9])+"), 1); - final int referrals = page.findIntPlain( - Pattern.compile("My referrals:([0-9])+"), 1); - final String referralURL = page.findLink(Pattern - .compile("http://uptobox\\.com/affiliate/[0-9]+")); + final boolean premium = !page.search( + Pattern.compile("Account type Free member", + Pattern.MULTILINE)).hasResults(); + final int points = page.search( + Pattern.compile("You have collected:([0-9])+")) + .asInteger(1); + final int referrals = page.search( + Pattern.compile("My referrals:([0-9])+")).asInteger(1); + final String referralURL = page.link( + Pattern.compile("http://uptobox\\.com/affiliate/[0-9]+")) + .asString(); - final HTMLPage index = get("http://uptobox.com/").asPage(); - final int maximumFileSize = index.findIntPlain( - Pattern.compile("Up to ([0-9]*) Mb"), 1); + final Page index = get("http://uptobox.com/").asPage(); + final int maximumFileSize = index.search( + Pattern.compile("Up to ([0-9]*) Mb")).asInteger(1); - final HTMLPage disk = get("http://uptobox.com/?op=my_files") - .asPage(); - final double usedDiskSpace = disk.findDoublePlain( - DISK_USAGE_PATTERN, 1); - final String usedDiskSpaceUnit = disk.findPlain(DISK_USAGE_PATTERN, - 3); - final double maximumDiskSpace = disk.findDoublePlain( - DISK_USAGE_PATTERN, 4); + final Page disk = get("http://uptobox.com/?op=my_files").asPage(); + final double usedDiskSpace = disk.search(DISK_USAGE_PATTERN) + .asDouble(1); + final String usedDiskSpaceUnit = disk.search(DISK_USAGE_PATTERN) + .asString(3); + final double maximumDiskSpace = disk.search(DISK_USAGE_PATTERN) + .asDouble(4); return (account = new AccountDetailsImpl(username, premium, Filesizes.mb(maximumFileSize), diff --git a/httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxConfiguration.java b/httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxUploaderConfiguration.java similarity index 90% rename from httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxConfiguration.java rename to httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxUploaderConfiguration.java index 0b04f7d..66aa6db 100644 --- a/httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxConfiguration.java +++ b/httpchannel-service/httpchannel-service-uptobox/src/main/java/com/rogiel/httpchannel/service/uptobox/UptoboxUploaderConfiguration.java @@ -28,7 +28,7 @@ import com.rogiel.httpchannel.service.uptobox.UptoboxService.UploaderImpl; * * @author Rogiel */ -public class UptoboxConfiguration extends +public class UptoboxUploaderConfiguration extends AbstractUploaderConfiguration implements UploaderConfiguration, DescriptionableUploaderConfiguration { /** @@ -42,7 +42,7 @@ public class UptoboxConfiguration extends } @Override - public UptoboxConfiguration description(String description) { + public UptoboxUploaderConfiguration description(String description) { this.description = description; return this; } diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/captcha/ReCaptchaExtractor.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/captcha/ReCaptchaExtractor.java index fd2e0a1..b39c131 100644 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/captcha/ReCaptchaExtractor.java +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/captcha/ReCaptchaExtractor.java @@ -22,10 +22,9 @@ import java.io.IOException; import java.net.URI; import java.util.regex.Pattern; -import com.rogiel.httpchannel.captcha.ImageCaptcha; import com.rogiel.httpchannel.http.HttpContext; import com.rogiel.httpchannel.util.PatternUtils; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; /** * This class provides utility methods to extract an {@link ImageCaptcha} from @@ -57,8 +56,8 @@ public class ReCaptchaExtractor { * the {@link HttpContext} * @return the {@link ImageCaptcha} embedded at the given page */ - public static ImageCaptcha extractCaptcha(HTMLPage page, HttpContext ctx) { - final String uri = page.findScriptSrc(CAPTCHA_URI_PATTERN); + public static ImageCaptcha extractCaptcha(Page page, HttpContext ctx) { + final String uri = page.scriptBySource(CAPTCHA_URI_PATTERN).asString(); if (uri == null) return null; try { @@ -77,8 +76,8 @@ public class ReCaptchaExtractor { * the {@link HttpContext} * @return the {@link ImageCaptcha} contained at the given page */ - public static ImageCaptcha extractAjaxCaptcha(HTMLPage page, HttpContext ctx) { - final String siteID = page.findScript(CAPTCHA_ID_PATTERN, 1); + public static ImageCaptcha extractAjaxCaptcha(Page page, HttpContext ctx) { + final String siteID = page.script(CAPTCHA_ID_PATTERN).asString(1); try { return doExtract(ctx.get(CHALLENGE_BASE_URI + siteID).asString()); } catch (IOException e) { diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/http/Request.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/http/Request.java index 95016bf..2236ada 100644 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/http/Request.java +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/http/Request.java @@ -33,7 +33,7 @@ import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; import com.rogiel.httpchannel.util.HttpClientUtils; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; public abstract class Request { private static final JSONParser jsonParser = new JSONParser(); @@ -90,14 +90,14 @@ public abstract class Request { }); } - public HTMLPage asPage() throws ClientProtocolException, IOException { - return HTMLPage.parse(asString()); + public Page asPage() throws ClientProtocolException, IOException { + return Page.parse(asString()); } - public Future asPageAsync() throws IOException { - return ctx.threadPool.submit(new Callable() { + public Future asPageAsync() throws IOException { + return ctx.threadPool.submit(new Callable() { @Override - public HTMLPage call() throws Exception { + public Page call() throws Exception { return asPage(); } }); diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/HttpClientUtils.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/HttpClientUtils.java index b168b9e..4bb2a44 100644 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/HttpClientUtils.java +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/HttpClientUtils.java @@ -31,7 +31,7 @@ import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpUriRequest; -import com.rogiel.httpchannel.util.htmlparser.HTMLPage; +import com.rogiel.httpchannel.util.html.Page; public class HttpClientUtils { private static final ExecutorService threadPool = Executors @@ -82,7 +82,7 @@ public class HttpClientUtils { } } - public static HTMLPage toPage(HttpResponse response) throws IOException { - return HTMLPage.parse(toString(response)); + public static Page toPage(HttpResponse response) throws IOException { + return Page.parse(toString(response)); } } diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/MatchedElement.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/MatchedElement.java new file mode 100644 index 0000000..52dc5ba --- /dev/null +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/MatchedElement.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.rogiel.httpchannel.util.html; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.htmlparser.Tag; + +/** + * An {@link PageElement} that has an matched string attached to it + * + * @author Rogiel + */ +public class MatchedElement extends PageElement { + /** + * The regular expression {@link Matcher} that retains the matched strings + * to it + */ + private final Matcher matcher; + + /** + * @param tag + * the tag + * @param matcher + * the matcher + */ + public MatchedElement(T tag, Matcher matcher) { + super(tag); + this.matcher = matcher; + } + + /** + * @param tag + * the tag + * @param pattern + * the pattern + * @param content + * the content + */ + public MatchedElement(T tag, Pattern pattern, String content) { + super(tag); + this.matcher = pattern.matcher(content); + } + + /** + * @param tag + * the tag + * @param content + * the content + */ + public MatchedElement(T tag, String content) { + this(tag, Pattern.compile(Pattern.quote(content)), content); + this.matcher.matches(); + } + + /** + * @return true if the element has an matched element + */ + public boolean matches() { + matcher.reset(); + return matcher.matches(); + } + + /** + * @return true if the element has an matched element (the + * entire value matches the pattern) + */ + public boolean matchesEntirelly() { + return matcher.lookingAt(); + } + + /** + * @return true if the pattern has found something on the + * element that matches it + */ + public boolean find() { + matcher.reset(); + return matcher.find(); + } + + /** + * @param n + * the group number + * @return true if the group exists + */ + public boolean hasGroup(int n) { + return n <= matcher.groupCount(); + } + + /** + * @return the entire matched value as a string + */ + public String asString() { + return asString(0); + } + + /** + * @return the group value as a string + */ + public String asString(int n) { + return matcher.group(n); + } + + /** + * @return the entire matched value as a integer + */ + public int asInteger() { + return asInteger(0); + } + + /** + * @return the group value as a integer + */ + public int asInteger(int n) { + return Integer.parseInt(asString(n)); + } + + /** + * @return the entire matched value as a long + */ + public long asLong() { + return asLong(0); + } + + /** + * @return the group value as a long + */ + public long asLong(int n) { + return Long.parseLong(asString(n)); + } + + /** + * @return the entire matched value as a double + */ + public double asDouble() { + return asDouble(0); + } + + /** + * @return the group value as a double + */ + public double asDouble(int n) { + return Double.parseDouble(asString(n)); + } + + /** + * @return the pattern matched against the element + */ + public Pattern getPattern() { + return matcher.pattern(); + } + + @Override + public String toString() { + return "MatchedElement [tag=" + tag + ", pattern=" + getPattern() + "]"; + } +} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/Page.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/Page.java new file mode 100644 index 0000000..3dfe711 --- /dev/null +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/Page.java @@ -0,0 +1,841 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.rogiel.httpchannel.util.html; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Pattern; + +import org.htmlparser.NodeFilter; +import org.htmlparser.Parser; +import org.htmlparser.Tag; +import org.htmlparser.filters.AndFilter; +import org.htmlparser.tags.FormTag; +import org.htmlparser.tags.FrameTag; +import org.htmlparser.tags.ImageTag; +import org.htmlparser.tags.InputTag; +import org.htmlparser.tags.LinkTag; +import org.htmlparser.tags.ScriptTag; +import org.htmlparser.tags.TextareaTag; +import org.htmlparser.util.NodeIterator; +import org.htmlparser.util.NodeList; +import org.htmlparser.util.ParserException; + +import com.rogiel.httpchannel.util.html.PageElement.TagMatcher; +import com.rogiel.httpchannel.util.html.filter.TypeTagFilter; +import com.rogiel.httpchannel.util.html.matcher.IDTagMatcher; +import com.rogiel.httpchannel.util.html.matcher.NameTagMatcher; + +/** + * This class handles all HTML parsing and searching. With this class is easy to + * search for links matching an {@link Pattern}, for images, frames, forms, + * inputs and maany more HTML widgets. + * + * @author Rogiel + */ +public class Page { + /** + * The list of nodes on the HTML DOM model + */ + private final NodeList nodes; + + /** + * This interface provides a mean to transform an list of objects into + * another type + * + * @author Rogiel + * + * @param + * the input object type + * @param + * the output object type + */ + private interface ListProcessor { + O process(I tag); + } + + /** + * An default {@link ListProcessor} that converts all tags to an + * {@link PageElement} + * + * @author Rogiel + * + * @param + * the input type + */ + private class DefaultListProcessor implements + ListProcessor> { + @Override + public PageElement process(I tag) { + return new PageElement(tag); + } + } + + /** + * Creates a new page instance + * + * @param parser + * the HTML parser + * @throws ParserException + * an parsing exception + */ + public Page(Parser parser) throws ParserException { + this.nodes = parser.parse(null); + } + + /* + * ************************************************************************ + * ***** INTERNAL + * ************************************************************************ + */ + /** + * Filters all the tags within this page to those matching the filter + * + * @param processor + * the list processor + * @param filters + * the filters to be applied + * @return an list of matching tags + */ + private List filter(ListProcessor processor, + NodeFilter... filters) { + final NodeFilter filter; + if (filters.length == 1) + filter = filters[0]; + else + filter = new AndFilter(filters); + try { + return list(nodes.extractAllNodesThatMatch(filter, true), processor); + } catch (ParserException e) { + return Collections.emptyList(); + } + } + + /** + * Creates a list of converted objects + * + * @param list + * the input list + * @param processor + * the processor that converts the object types + * @return the processed and converted list + * @throws ParserException + * if any exception occur + */ + @SuppressWarnings("unchecked") + private List list(final NodeList list, + ListProcessor processor) throws ParserException { + final List filtered = new ArrayList<>(); + final NodeIterator iterator = list.elements(); + while (iterator.hasMoreNodes()) { + filtered.add(processor.process((T) iterator.nextNode())); + } + return filtered; + } + + /** + * Tries to search for a tag value that matches exactly (the entire string) + * with the pattern. + * + * @param list + * the list of elements + * @param pattern + * the pattern + * @param tagMatcher + * the tag matcher (which will be matched against the pattern) + * @param realMatcher + * the real matcher (which will be returned on the + * {@link MatchedElement}) + * @return an list of {@link MatchedElement} + */ + private > List> match( + List list, Pattern pattern, TagMatcher tagMatcher, + TagMatcher realMatcher) { + final List> matchList = new ArrayList<>(); + for (final E tag : list) { + final MatchedElement matched = tag.match(pattern, tagMatcher); + if (matched == null) + continue; + if (matched.matches()) { + if (tagMatcher == realMatcher) { + matchList.add(matched); + } else { + matchList.add(tag.match(realMatcher)); + } + } + } + return matchList; + } + + /** + * Tries to search for a tag value that matches exactly (the entire string) + * with the pattern. + * + * @param list + * the list of elements + * @param pattern + * the pattern + * @param tagMatcher + * the tag matcher (which will be matched against the pattern and + * used on {@link MatchedElement}) + * @return an list of {@link MatchedElement} + */ + private > List> match( + List list, Pattern pattern, TagMatcher tagMatcher) { + return match(list, pattern, tagMatcher, tagMatcher); + } + + /** + * Tries to search for a tag value that contains the content within the + * pattern. + * + * @param list + * the list of elements + * @param pattern + * the pattern + * @param tagMatcher + * the tag matcher (which will be matched against the pattern and + * used on {@link MatchedElement}) + * @return an list of {@link MatchedElement} + */ + + private > List> find( + List list, Pattern pattern, TagMatcher tagMatcher) { + final List> matchList = new ArrayList<>(); + for (final E tag : list) { + final MatchedElement matched = tag.match(pattern, tagMatcher); + if (matched.find()) + matchList.add(matched); + } + return matchList; + } + + /** + * Returns a single element from the list + * + * @param list + * the list + * @return the first element at the list + */ + private O single(List list) { + if (list.size() == 0) + return null; + return list.get(0); + } + + /** + * Parses the HTML page to a plain string. This is similar to the + * "SEO preview" systems + * + * @return + */ + public String asPlainString() { + String string = nodes.asString().replaceAll(" ", ""); + final String[] lines = string.split("\n"); + + final StringBuilder builder = new StringBuilder(); + for (final String line : lines) { + String procLine = line.replaceAll("\t", " ").trim(); + if (procLine.length() == 0) + continue; + builder.append(line.replaceAll("\t", " ").trim()).append(" "); + } + + return builder.toString(); + } + + /* + * ************************************************************************ + * ***** TEXT SEARCH + * ************************************************************************ + */ + /** + * Searches for the given pattern at the entire page + * + * @param pattern + * the pattern + * @return the search results + */ + public SearchResults search(Pattern pattern) { + return new SearchResults(pattern, asPlainString()); + } + + /** + * Searches for the given text at the entire page + * + * @param text + * the text + * @return the search results + */ + public SearchResults searchFirst(String text) { + return search(Pattern.compile(Pattern.quote(text))); + } + + /* + * ************************************************************************ + * ***** LINKS + * ************************************************************************ + */ + /** + * An {@link TagMatcher} that returns the link href + */ + private static final TagMatcher LINK_TAG_MATCHER = new TagMatcher() { + @Override + public String content(LinkTag tag) { + return tag.getLink(); + } + }; + + /** + * @return a list of all links contained at the page + */ + public List> links() { + return filter(new DefaultListProcessor(), new TypeTagFilter( + LinkTag.class)); + } + + /** + * Return all links whose URL matches the given pattern + * + * @param pattern + * the pattern + * @return the list of links matching the pattern + */ + public List> links(Pattern pattern) { + return match(links(), pattern, LINK_TAG_MATCHER); + } + + /** + * Return the first link whose URL matches the given pattern + * + * @param pattern + * the pattern + * @return the first link matching the pattern + */ + public MatchedElement link(Pattern pattern) { + return single(links(pattern)); + } + + /** + * @param pattern + * the pattern + * @return the links whose IDs matches the pattern + */ + public List> linksByID(Pattern pattern) { + return match(links(), pattern, new IDTagMatcher(), + LINK_TAG_MATCHER); + } + + /** + * @param id + * the link ID + * @return the link with the given ID + */ + public MatchedElement linkByID(String id) { + return single(linksByID(Pattern.compile(Pattern.quote(id)))); + } + + /** + * @param pattern + * the name pattern + * @return the links whose name matches the pattern + */ + public List> linksByName(Pattern pattern) { + return match(links(), pattern, new NameTagMatcher(), + LINK_TAG_MATCHER); + } + + /** + * @param name + * the name + * @return the link with the given name + */ + public MatchedElement linkByName(String name) { + return single(linksByName(Pattern.compile(Pattern.quote(name)))); + } + + /* + * ************************************************************************ + * ***** IMAGES + * ************************************************************************ + */ + /** + * An {@link TagMatcher} that returns the image source url + */ + private static final TagMatcher IMAGE_TAG_MATCHER = new TagMatcher() { + @Override + public String content(ImageTag tag) { + return tag.getImageURL(); + } + }; + + /** + * @return the list of all images at the page + */ + public List> images() { + return filter(new DefaultListProcessor(), new TypeTagFilter( + ImageTag.class)); + } + + /** + * @param pattern + * the image url pattern + * @return the list of images matching the url pattern + */ + public List> images(Pattern pattern) { + return match(images(), pattern, IMAGE_TAG_MATCHER); + } + + /** + * @param pattern + * the image url pattern + * @return the first image whose url matches the pattern + */ + public MatchedElement image(Pattern pattern) { + return single(images(pattern)); + } + + /** + * @param pattern + * the pattern id + * @return the list of images that match the given id + */ + public List> imagesByID(Pattern pattern) { + return match(images(), pattern, new IDTagMatcher(), + IMAGE_TAG_MATCHER); + } + + /** + * @param id + * the image ID + * @return the image that matches with the given id + */ + public MatchedElement imageByID(String id) { + return single(imagesByID(Pattern.compile(Pattern.quote(id)))); + } + + /** + * @param pattern + * the image name pattern + * @return the list of images whose names match the pattern + */ + public List> imagesByName(Pattern pattern) { + return match(images(), pattern, new NameTagMatcher(), + IMAGE_TAG_MATCHER); + } + + /** + * @param name + * the image name + * @return the image whose name matches the given + */ + public MatchedElement imageByName(String name) { + return single(imagesByName(Pattern.compile(Pattern.quote(name)))); + } + + /* + * ************************************************************************ + * ***** FORM + * ************************************************************************ + */ + /** + * An {@link TagMatcher} that returns the form action (or submit) url + */ + private static final TagMatcher FORM_TAG_MATCHER = new TagMatcher() { + @Override + public String content(FormTag tag) { + return tag.getFormLocation(); + } + }; + + /** + * @return the list of all forms on the page + */ + public List> forms() { + return filter(new DefaultListProcessor(), new TypeTagFilter( + FormTag.class)); + } + + /** + * @param pattern + * the action url pattern + * @return the forms whose urls matches the pattern + */ + public List> forms(Pattern pattern) { + return match(forms(), pattern, FORM_TAG_MATCHER); + } + + /** + * @param pattern + * the action url pattern + * @return the first form whose action url matches the pattern + */ + public MatchedElement form(Pattern pattern) { + return single(forms(pattern)); + } + + /** + * @param pattern + * the form id pattern + * @return the forms whose ids matches the pattern + */ + public List> formsByID(Pattern pattern) { + return match(forms(), pattern, new IDTagMatcher(), + FORM_TAG_MATCHER); + } + + /** + * @param id + * the form id + * @return the form whose id matches the given + */ + public MatchedElement formByID(String id) { + return single(formsByID(Pattern.compile(Pattern.quote(id)))); + } + + /** + * @param pattern + * the form name pattern + * @return the forms whose names matches the pattern + */ + public List> formsByName(Pattern pattern) { + return match(forms(), pattern, new NameTagMatcher(), + FORM_TAG_MATCHER); + } + + /** + * @param name + * the form name + * @return the form whose name matches the given + */ + public MatchedElement formByName(String name) { + return single(formsByName(Pattern.compile(Pattern.quote(name)))); + } + + /* + * ************************************************************************ + * ***** INPUT + * ************************************************************************ + */ + /** + * An {@link TagMatcher} that returns the input value + */ + private static final TagMatcher INPUT_TAG_MATCHER = new TagMatcher() { + @Override + public String content(InputTag tag) { + return tag.getAttribute("value"); + } + }; + + /** + * @return the list of all inputs on the page + */ + public List> inputs() { + return filter(new DefaultListProcessor(), new TypeTagFilter( + InputTag.class)); + } + + /** + * @param pattern + * the input value pattern + * @return the inputs whose values matches the pattern + */ + public List> inputs(Pattern pattern) { + return find(inputs(), pattern, INPUT_TAG_MATCHER); + } + + /** + * @param pattern + * the action url pattern + * @return the first input whose value matches the pattern + */ + public MatchedElement input(Pattern pattern) { + return single(inputs(pattern)); + } + + /** + * @param pattern + * the input id pattern + * @return the inputs whose ids matches the pattern + */ + public List> inputsByID(Pattern pattern) { + return match(inputs(), pattern, new IDTagMatcher(), + INPUT_TAG_MATCHER); + } + + /** + * @param name + * the input id + * @return the input whose id matches the given + */ + public MatchedElement inputByID(String id) { + return single(inputsByID(Pattern.compile(Pattern.quote(id)))); + } + + /** + * @param pattern + * the input name pattern + * @return the inputs whose name matches the pattern + */ + public List> inputsByName(Pattern pattern) { + return match(inputs(), pattern, new NameTagMatcher(), + INPUT_TAG_MATCHER); + } + + /** + * @param name + * the input name + * @return the input whose name matches the given + */ + public MatchedElement inputByName(String name) { + return single(inputsByName(Pattern.compile(Pattern.quote(name)))); + } + + /* + * ************************************************************************ + * ***** TEXTAREA + * ************************************************************************ + */ + /** + * An {@link TagMatcher} that returns the textarea value + */ + private static final TagMatcher TEXTAREA_TAG_MATCHER = new TagMatcher() { + @Override + public String content(TextareaTag tag) { + return tag.getStringText(); + } + }; + + /** + * @return the list of all textareas on the page + */ + public List> textareas() { + return filter(new DefaultListProcessor(), + new TypeTagFilter(TextareaTag.class)); + } + + /** + * @param pattern + * the textarea value pattern + * @return the textareas whose values matches the pattern + */ + public List> textareas(Pattern pattern) { + return match(textareas(), pattern, TEXTAREA_TAG_MATCHER); + } + + /** + * @param pattern + * the textarea value pattern + * @return the first textarea whose value matches the pattern + */ + public MatchedElement textarea(Pattern pattern) { + return single(textareas(pattern)); + } + + /** + * @param pattern + * the textarea id pattern + * @return the textareas whose ids matches the pattern + */ + public List> textareasByID(Pattern pattern) { + return match(textareas(), pattern, new IDTagMatcher(), + TEXTAREA_TAG_MATCHER); + } + + /** + * @param name + * the textarea id + * @return the textarea whose id matches the given + */ + public MatchedElement textareaByID(String id) { + return single(textareasByID(Pattern.compile(Pattern.quote(id)))); + } + + /** + * @param pattern + * the textarea name pattern + * @return the textareas whose name matches the pattern + */ + public List> textareasByName(Pattern pattern) { + return match(textareas(), pattern, new NameTagMatcher(), + TEXTAREA_TAG_MATCHER); + } + + /** + * @param name + * the textarea name + * @return the textarea whose name matches the given + */ + public MatchedElement textareaByName(String name) { + return single(textareasByName(Pattern.compile(Pattern.quote(name)))); + } + + /* + * ************************************************************************ + * ***** JAVASCRIPT + * ************************************************************************ + */ + /** + * An {@link TagMatcher} that returns the script code + */ + public List> scripts() { + return filter(new DefaultListProcessor(), new TypeTagFilter( + ScriptTag.class)); + } + + /** + * @return the list of all scripts on the page + */ + public List> scripts(Pattern pattern) { + return find(scripts(), pattern, new TagMatcher() { + @Override + public String content(ScriptTag tag) { + return tag.getScriptCode(); + } + }); + } + + /** + * @param pattern + * the script code pattern + * @return the first script whose code matches the pattern + */ + public MatchedElement script(Pattern pattern) { + return single(scripts(pattern)); + } + + /** + * @param pattern + * the script url pattern + * @return the scripts whose urls matches the pattern + */ + public MatchedElement scriptBySource(Pattern pattern) { + return single(match(scripts(), pattern, new TagMatcher() { + @Override + public String content(ScriptTag tag) { + return tag.getAttribute("src"); + } + })); + } + + /* + * ************************************************************************ + * ***** FRAME + * ************************************************************************ + */ + /** + * An {@link TagMatcher} that returns the frame url + */ + private static final TagMatcher FRAME_TAG_MATCHER = new TagMatcher() { + @Override + public String content(FrameTag tag) { + return tag.getFrameLocation(); + } + }; + + /** + * @return the list of all frames on the page + */ + public List> frames() { + return filter(new DefaultListProcessor(), new TypeTagFilter( + FrameTag.class)); + } + + /** + * @param pattern + * the frame url pattern + * @return the frames whose urls matches the pattern + */ + public List> frames(Pattern pattern) { + return match(frames(), pattern, FRAME_TAG_MATCHER); + } + + /** + * @param pattern + * the frame url pattern + * @return the first frame whose url matches the pattern + */ + public MatchedElement frame(Pattern pattern) { + return single(frames(pattern)); + } + + /** + * @param pattern + * the frame id pattern + * @return the frames whose id matches the pattern + */ + public List> framesByID(Pattern pattern) { + return match(frames(), pattern, new IDTagMatcher(), + FRAME_TAG_MATCHER); + } + + /** + * @param name + * the frame id + * @return the frame whose id matches the given + */ + public MatchedElement frameByID(String id) { + return single(framesByID(Pattern.compile(Pattern.quote(id)))); + } + + /** + * @param pattern + * the frame name pattern + * @return the frames whose name matches the pattern + */ + public List> framesByName(Pattern pattern) { + return match(frames(), pattern, new NameTagMatcher(), + FRAME_TAG_MATCHER); + } + + /** + * @param name + * the frame name + * @return the frame whose name matches the given + */ + public MatchedElement frameByName(String name) { + return single(framesByName(Pattern.compile(Pattern.quote(name)))); + } + + /* + * ************************************************************************ + * ***** INITIALIZERS + * ************************************************************************ + */ + /** + * Creates a new page parsing the HTML input + * + * @param html + * the html code + * @return the newly created {@link Page} object + */ + public static Page parse(String html) { + try { + return new Page(Parser.createParser(html, null)); + } catch (ParserException e) { + return null; + } + } + + @Override + public String toString() { + return nodes.toHtml(false); + } +} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/PageElement.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/PageElement.java new file mode 100644 index 0000000..48a5b31 --- /dev/null +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/PageElement.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.rogiel.httpchannel.util.html; + +import java.util.regex.Pattern; + +import org.htmlparser.Tag; + +/** + * An element that represents an tag on the page + * + * @author Rogiel + */ +public class PageElement { + /** + * The tag represented by this element + */ + protected final T tag; + + /** + * Creates a new instance + * + * @param tag + * the tag + */ + public PageElement(T tag) { + this.tag = tag; + } + + /** + * Tries to match the element with a given pattern + * + * @param pattern + * the pattern + * @return the matched element + */ + public MatchedElement match(Pattern pattern) { + return match(pattern, null); + } + + /** + * Tries to match the element with a given pattern using an alternative + * {@link TagMatcher} + * + * @param pattern + * the pattern + * @param tagMatcher + * the tag matcher + * @return the matched element + */ + public MatchedElement match(Pattern pattern, TagMatcher tagMatcher) { + if (tagMatcher == null) { + tagMatcher = new TagMatcher() { + @Override + public String content(T tag) { + return tag.toHtml(); + } + }; + } + final String content = tagMatcher.content(tag); + if (content == null) + return null; + return new MatchedElement(tag, pattern, tagMatcher.content(tag)); + } + + /** + * Tries to match the element with itself (return a {@link MatchedElement} + * that always matched it self) + * + * @param tagMatcher + * the tag matcher + * @return always an {@link MatchedElement} whose group 0 matches it self + */ + public MatchedElement match(TagMatcher tagMatcher) { + if (tagMatcher == null) { + tagMatcher = new TagMatcher() { + @Override + public String content(T tag) { + return tag.toHtml(); + } + }; + } + final String content = tagMatcher.content(tag); + if (content == null) + return null; + return new MatchedElement(tag, tagMatcher.content(tag)); + } + + /** + * An tag matcher is an helper class that can return an value that the + * matcher should use to test the pattern against it. + * + * @author Rogiel + * + * @param + * the tag type + */ + public interface TagMatcher { + String content(T tag); + } + + /** + * @return the tag object + */ + public T tag() { + return tag; + } + + @Override + public String toString() { + return "PageElement [tag=" + tag + "]"; + } +} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/SearchResults.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/SearchResults.java new file mode 100644 index 0000000..b17b353 --- /dev/null +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/SearchResults.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.rogiel.httpchannel.util.html; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Represents an search done against an page string + * + * @author Rogiel + */ +public class SearchResults { + /** + * The matcher + */ + private final Matcher matcher; + + /** + * Creates a new instance + * + * @param matcher + * the matcher + */ + public SearchResults(Matcher matcher) { + this.matcher = matcher; + } + + /** + * Creates a new instance + * + * @param pattern + * the pattern + * @param content + * the content + */ + public SearchResults(Pattern pattern, String content) { + this.matcher = pattern.matcher(content); + this.matcher.find(); + } + + /** + * @return true if the matcher has found any results + */ + public boolean hasResults() { + matcher.reset(); + return matcher.find(); + } + + /** + * @param n + * the group number + * @return true if the group exists + */ + public boolean hasGroup(int n) { + return n <= matcher.groupCount(); + } + + /** + * @return the entire matched value as a string + */ + public String asString() { + return asString(0); + } + + /** + * @return the group value as a string + */ + public String asString(int n) { + return matcher.group(n); + } + + /** + * @return the entire matched value as a integer + */ + public int asInteger() { + return asInteger(0); + } + + /** + * @return the group value as a integer + */ + public int asInteger(int n) { + return Integer.parseInt(asString(n)); + } + + /** + * @return the entire matched value as a long + */ + public long asLong() { + return asLong(0); + } + + /** + * @return the group value as a long + */ + public long asLong(int n) { + return Long.parseLong(asString(n)); + } + + /** + * @return the entire matched value as a double + */ + public double asDouble() { + return asDouble(0); + } + + /** + * @return the group value as a double + */ + public double asDouble(int n) { + return Double.parseDouble(asString(n)); + } + + /** + * @return the pattern matched against the element + */ + public Pattern getPattern() { + return matcher.pattern(); + } +} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/IDFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/filter/TypeTagFilter.java similarity index 64% rename from httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/IDFilter.java rename to httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/filter/TypeTagFilter.java index b284665..fa074ed 100644 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/IDFilter.java +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/filter/TypeTagFilter.java @@ -16,29 +16,36 @@ * specific language governing permissions and limitations * under the License. */ -package com.rogiel.httpchannel.util.htmlparser; +package com.rogiel.httpchannel.util.html.filter; import org.htmlparser.Node; import org.htmlparser.NodeFilter; import org.htmlparser.Tag; -public class IDFilter implements NodeFilter { +/** + * An filter that selects all tags matching an given type + * + * @author Rogiel + */ +public class TypeTagFilter implements NodeFilter { private static final long serialVersionUID = 1L; - private final String id; + /** + * The tag type + */ + private final Class type; - public IDFilter(String id) { - this.id = id; + /** + * Creates a new instance + * + * @param type + * the tag type + */ + public TypeTagFilter(Class type) { + this.type = type; } @Override public boolean accept(Node node) { - if (!(node instanceof Tag)) - return false; - final Tag tag = (Tag) node; - if (tag.getAttribute("id") == null) - return false; - if (!tag.getAttribute("id").equals(id)) - return false; - return true; + return type.isAssignableFrom(node.getClass()); } } diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ContainsFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/matcher/IDTagMatcher.java similarity index 63% rename from httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ContainsFilter.java rename to httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/matcher/IDTagMatcher.java index a39c037..f86b13a 100644 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ContainsFilter.java +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/matcher/IDTagMatcher.java @@ -1,38 +1,36 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; - -public class ContainsFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final Pattern content; - - public ContainsFilter(Pattern content) { - this.content = content; - } - - @Override - public boolean accept(Node node) { - return content.matcher(node.getText()).find(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.rogiel.httpchannel.util.html.matcher; + +import org.htmlparser.Tag; + +import com.rogiel.httpchannel.util.html.PageElement.TagMatcher; + +/** + * An {@link TagMatcher} that always returns the tag ID + * + * @author Rogiel + */ +public class IDTagMatcher implements TagMatcher { + @Override + public String content(T tag) { + return tag.getAttribute("id"); + } + +} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ContainsInLowerCaseFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/matcher/NameTagMatcher.java similarity index 62% rename from httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ContainsInLowerCaseFilter.java rename to httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/matcher/NameTagMatcher.java index 27f0607..1a28db3 100644 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ContainsInLowerCaseFilter.java +++ b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/html/matcher/NameTagMatcher.java @@ -1,38 +1,36 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; - -public class ContainsInLowerCaseFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final Pattern content; - - public ContainsInLowerCaseFilter(Pattern content) { - this.content = content; - } - - @Override - public boolean accept(Node node) { - return content.matcher(node.getText().toLowerCase()).find(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.rogiel.httpchannel.util.html.matcher; + +import org.htmlparser.Tag; + +import com.rogiel.httpchannel.util.html.PageElement.TagMatcher; + +/** + * An {@link TagMatcher} that always returns the tag name + * + * @author Rogiel + */ +public class NameTagMatcher implements TagMatcher { + @Override + public String content(T tag) { + return tag.getAttribute("name"); + } + +} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/FormActionPatternFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/FormActionPatternFilter.java deleted file mode 100644 index 19a43f6..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/FormActionPatternFilter.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.tags.FormTag; - -public class FormActionPatternFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final Pattern pattern; - - public FormActionPatternFilter(Pattern pattern) { - this.pattern = pattern; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof FormTag)) - return false; - final FormTag form = (FormTag) node; - return pattern.matcher(form.getFormLocation()).matches(); - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/FramePatternFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/FramePatternFilter.java deleted file mode 100644 index a361a18..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/FramePatternFilter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.nodes.TagNode; - -public class FramePatternFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final Pattern pattern; - - public FramePatternFilter(Pattern pattern) { - this.pattern = pattern; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof TagNode)) - return false; - final TagNode frame = (TagNode) node; - if (frame.getAttribute("src") == null) - return false; - return pattern.matcher(frame.getAttribute("src")).matches(); - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/HTMLPage.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/HTMLPage.java deleted file mode 100644 index ab8a4c3..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/HTMLPage.java +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.Parser; -import org.htmlparser.Tag; -import org.htmlparser.filters.AndFilter; -import org.htmlparser.nodes.TagNode; -import org.htmlparser.tags.FormTag; -import org.htmlparser.tags.ImageTag; -import org.htmlparser.tags.InputTag; -import org.htmlparser.tags.LinkTag; -import org.htmlparser.tags.ScriptTag; -import org.htmlparser.tags.TextareaTag; -import org.htmlparser.util.NodeIterator; -import org.htmlparser.util.NodeList; -import org.htmlparser.util.ParserException; - -/** - * @author Rogiel - */ -public class HTMLPage { - private final NodeList nodes; - - private HTMLPage(Parser parser) throws ParserException { - this.nodes = parser.parse(null); - } - - private List filter(final Class nodeType, - NodeFilter... filters) { - final NodeFilter filter; - if (filters.length == 1) - filter = filters[0]; - else - filter = new AndFilter(filters); - try { - return list(nodes.extractAllNodesThatMatch(filter, true)); - } catch (ParserException e) { - return Collections.emptyList(); - } - } - - @SuppressWarnings("unchecked") - private List list(final NodeList list) - throws ParserException { - final List filtered = new ArrayList<>(); - final NodeIterator iterator = list.elements(); - while (iterator.hasMoreNodes()) { - filtered.add((T) iterator.nextNode()); - } - return filtered; - } - - public boolean containsPlain(Pattern pattern) { - return pattern.matcher(asString()).find(); - } - - public boolean contains(final Pattern pattern) { - return !filter(Node.class, new ContainsFilter(pattern)).isEmpty(); - } - - public boolean contains(final String text) { - return contains(Pattern.compile(Pattern.quote(text))); - } - - public boolean containsIgnoreCase(final String text) { - return !filter( - Node.class, - new ContainsInLowerCaseFilter(Pattern.compile(Pattern - .quote(text.toLowerCase())))).isEmpty(); - } - - public String findPlain(final Pattern pattern, int n) { - final Matcher matcher = pattern.matcher(asString()); - if (matcher.find()) - return matcher.group(n); - return null; - } - - public int findIntPlain(final Pattern pattern, int n) { - return Integer.parseInt(findPlain(pattern, n)); - } - - public double findDoublePlain(final Pattern pattern, int n) { - return Double.parseDouble(findPlain(pattern, n)); - } - - public String find(final Pattern pattern, int n) { - for (final Node tag : filter(Tag.class, new ContainsFilter(pattern))) { - final Matcher matcher = pattern.matcher(tag.getText()); - if (matcher.find()) - return matcher.group(n); - } - return null; - } - - public int findAsInt(final Pattern pattern, int n) { - String found = find(pattern, n); - if (found == null) - return 0; - return Integer.parseInt(findScript(pattern, n)); - } - - /** - * Tries to find a link that has an URI following the given pattern - * - * @param pattern - * the pattern - * @return the link content, if found. null otherwise - */ - public String findLink(final Pattern pattern) { - for (final LinkTag tag : filter(LinkTag.class, new LinkPatternFilter( - pattern))) { - return tag.getLink(); - } - return null; - } - - /** - * Tries to find a frame that has an URI following the given pattern - * - * @param pattern - * the pattern - * @return the iframe uri, if found. null otherwise - */ - public String findFrame(final Pattern pattern) { - for (final TagNode tag : filter(TagNode.class, new FramePatternFilter( - pattern))) { - return tag.getAttribute("src"); - } - return null; - } - - /** - * Tries to find a image that has an URI following the given pattern - * - * @param pattern - * the pattern - * @return the iframe uri, if found. null otherwise - */ - public String findImage(final Pattern pattern) { - for (final ImageTag tag : filter(ImageTag.class, - new ImagePatternFilter(pattern))) { - return tag.getImageURL(); - } - return null; - } - - /** - * Tries to find a form which has an location that respects the given - * pattern - * - * @param pattern - * the pattern - * @return the URI found, if any. null otherwise - */ - public String findFormAction(final Pattern pattern) { - for (final FormTag tag : filter(FormTag.class, - new FormActionPatternFilter(pattern))) { - return tag.getFormLocation(); - } - return null; - } - - private String inputValue(List tags) { - for (final InputTag tag : tags) { - return tag.getAttribute("value"); - } - return null; - } - - public String getInputValue(final String inputName) { - return inputValue(filter(InputTag.class, new InputNameFilter(inputName))); - } - - public int getInputValueAsInt(final String inputName) { - return Integer.parseInt(getInputValue(inputName)); - } - - public String getInputValueById(final String id) { - return inputValue(filter(InputTag.class, new InputIDFilter(id))); - } - - public int getInputValueByIdInt(final String id) { - return Integer.parseInt(inputValue(filter(InputTag.class, - new InputIDFilter(id)))); - } - - public String getInputValue(final Pattern pattern) { - return inputValue(filter(InputTag.class, new InputValuePatternFilter( - pattern))); - } - - public String getTextareaValueById(String id) { - return ((TextareaTag) getTagByID(id)).getStringText(); - } - - public String getTextareaValueByName(String name) { - return ((TextareaTag) getTagByName(name)).getStringText(); - } - - public Tag getTagByID(final String id) { - for (final Tag tag : filter(Tag.class, new IDFilter(id))) { - return tag; - } - return null; - } - - public Tag getTagByName(final String name) { - for (final Tag tag : filter(Tag.class, new NameFilter(name))) { - return tag; - } - return null; - } - - public String findScript(final Pattern pattern, int n) { - for (final ScriptTag tag : filter(ScriptTag.class, - new ScriptContainsFilter(pattern))) { - final Matcher matcher = pattern.matcher(tag.getScriptCode()); - if (matcher.find()) - return matcher.group(n); - } - return null; - } - - public String findScriptSrc(final Pattern pattern) { - for (final ScriptTag tag : filter(ScriptTag.class, new ScriptSrcFilter( - pattern))) { - final Matcher matcher = pattern.matcher(tag.getAttribute("src")); - if (matcher.matches()) - return matcher.group(); - } - return null; - } - - public int findScriptAsInt(final Pattern pattern, int n) { - String found = findScript(pattern, n); - if (found == null) - return 0; - return Integer.parseInt(found); - } - - public String toString() { - // try { - // return parser.parse(null).toHtml(false); - // } catch (ParserException e1) { - // return null; - // } - return nodes.toHtml(false); - } - - public static HTMLPage parse(String html) { - try { - return new HTMLPage(Parser.createParser(html, null)); - } catch (ParserException e) { - return null; - } - } - - public String asString() { - StringBuffer buff = new StringBuffer(); - for (int i = 0; i < nodes.size(); i++) { - // final String content = nodes.elementAt(i).toPlainTextString() - // .replaceAll("\n", "").replaceAll("\\t", "").trim(); - // if (content.length() > 0) { - // buff.append(" ").append(content); - // } - final String[] lines = nodes.elementAt(i).toPlainTextString() - .split("\n"); - for (final String line : lines) { - final String processed = line.trim(); - if (processed.length() > 0) { - buff.append(line.trim()).append(" "); - } - } - - } - return buff.toString(); - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ImagePatternFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ImagePatternFilter.java deleted file mode 100644 index e26825a..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ImagePatternFilter.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.tags.ImageTag; - -public class ImagePatternFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final Pattern pattern; - - public ImagePatternFilter(Pattern pattern) { - this.pattern = pattern; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof ImageTag)) - return false; - final ImageTag frame = (ImageTag) node; - return pattern.matcher(frame.getImageURL()).matches(); - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputIDFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputIDFilter.java deleted file mode 100644 index ec40c9a..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputIDFilter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.tags.InputTag; - -public class InputIDFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final String id; - - public InputIDFilter(String id) { - this.id = id; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof InputTag)) - return false; - final InputTag input = (InputTag) node; - if (input.getAttribute("id") == null) - return false; - if (!input.getAttribute("id").equals(id)) - return false; - return true; - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputNameFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputNameFilter.java deleted file mode 100644 index 76e2a40..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputNameFilter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.tags.InputTag; - -public class InputNameFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final String name; - - public InputNameFilter(String name) { - this.name = name; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof InputTag)) - return false; - final InputTag input = (InputTag) node; - if (input.getAttribute("name") == null) - return false; - if (!input.getAttribute("name").equals(name)) - return false; - return true; - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputValuePatternFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputValuePatternFilter.java deleted file mode 100644 index 2878ae0..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/InputValuePatternFilter.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.tags.InputTag; - -public class InputValuePatternFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final Pattern pattern; - - public InputValuePatternFilter(Pattern pattern) { - this.pattern = pattern; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof InputTag)) - return false; - final InputTag input = (InputTag) node; - if (input.getAttribute("value") == null) - return false; - if (!pattern.matcher(input.getAttribute("value")).matches()) - return false; - return true; - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/LinkPatternFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/LinkPatternFilter.java deleted file mode 100644 index db7a15f..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/LinkPatternFilter.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.tags.LinkTag; - -public class LinkPatternFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final Pattern pattern; - - public LinkPatternFilter(Pattern pattern) { - this.pattern = pattern; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof LinkTag)) - return false; - final LinkTag link = (LinkTag) node; - return pattern.matcher(link.getLink()).matches(); - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/NameFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/NameFilter.java deleted file mode 100644 index cdbe5a7..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/NameFilter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.Tag; - -public class NameFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final String name; - - public NameFilter(String name) { - this.name = name; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof Tag)) - return false; - final Tag tag = (Tag) node; - if (tag.getAttribute("name") == null) - return false; - if (!tag.getAttribute("name").equals(name)) - return false; - return true; - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ScriptContainsFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ScriptContainsFilter.java deleted file mode 100644 index e987aa6..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ScriptContainsFilter.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.tags.ScriptTag; - -public class ScriptContainsFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final Pattern pattern; - - public ScriptContainsFilter(Pattern pattern) { - this.pattern = pattern; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof ScriptTag)) - return false; - final ScriptTag script = (ScriptTag) node; - return pattern.matcher(script.getScriptCode()).find(); - } -} diff --git a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ScriptSrcFilter.java b/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ScriptSrcFilter.java deleted file mode 100644 index 34f5143..0000000 --- a/httpchannel-util/src/main/java/com/rogiel/httpchannel/util/htmlparser/ScriptSrcFilter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package com.rogiel.httpchannel.util.htmlparser; - -import java.util.regex.Pattern; - -import org.htmlparser.Node; -import org.htmlparser.NodeFilter; -import org.htmlparser.tags.ScriptTag; - -public class ScriptSrcFilter implements NodeFilter { - private static final long serialVersionUID = 1L; - private final Pattern pattern; - - public ScriptSrcFilter(Pattern pattern) { - this.pattern = pattern; - } - - @Override - public boolean accept(Node node) { - if (!(node instanceof ScriptTag)) - return false; - final ScriptTag script = (ScriptTag) node; - if (script.getAttribute("src") == null) - return false; - return pattern.matcher(script.getAttribute("src")).matches(); - } -}