|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectecologylab.generic.Debug
ecologylab.net.ParsedURL
public class ParsedURL
Extends the URL with many features for the convenience and power of network programmers. New class for manipulating and displaying URLs. Uses lazy evaluation to minimize storage allocation.
| Field Summary | |
|---|---|
protected java.net.URL |
directory
Directory that the document referred to by the URL resides in. |
protected java.lang.String |
domain
|
protected java.net.URL |
hashUrl
URL with hash, that is, a reference to an anchor within the document. |
protected java.lang.String |
lc
|
protected java.lang.String |
string
String representation of the URL. |
protected java.lang.String |
suffix
|
static int |
TIMEOUT
|
protected java.net.URL |
url
this is the no hash url, that is, the one with # and anything after it stripped out. |
| Fields inherited from interface ecologylab.net.MimeType |
|---|
GIF, HTML, JPG, NUM_MEDIA_MIMES, PDF, PNG, RSS, TXT, UNKNOWN_MIME |
| Constructor Summary | |
|---|---|
ParsedURL(java.io.File file)
Create a ParsedURL from a file. |
|
ParsedURL(java.net.URL url)
|
|
| Method Summary | |
|---|---|
PURLConnection |
connect()
Create a connection, using the standard timeouts of 23 seconds, and the super-basic ConnectionAdapter, which does *nothing special* when encountering directories, re-directs, ... |
PURLConnection |
connect(ConnectionHelper connectionHelper)
Create a connection, using the standard timeouts of 23 seconds. |
PURLConnection |
connect(ConnectionHelper connectionHelper,
int connectionTimeout,
int readTimeout)
Create a connection. |
boolean |
crawlable()
Use unsupportedMimes and protocolIsSupported to determine if this is content fit for processing. |
static ParsedURL |
createFromHTML(ParsedURL contextPURL,
java.lang.String addressString,
boolean fromSearchPage)
Called while processing (parsing) HTML. |
ParsedURL |
createFromHTML(java.lang.String addressString)
Called while processing (parsing) HTML. |
ParsedURL |
createFromHTML(java.lang.String addressString,
boolean fromSearchPage)
Called while processing (parsing) HTML. |
java.net.URL |
directory()
Get the URL for the directory associated with this. |
ParsedURL |
directoryPURL()
Form a ParsedURL based on this, if this is a directory. |
java.lang.String |
directoryString()
|
java.lang.String |
domain()
Uses lazy evaluation to minimize storage allocation. |
boolean |
equals(java.lang.Object other)
Return true if the other object is either a ParsedURL or a URL that refers to the same location as this. |
java.io.File |
file()
|
protected static ParsedURL |
get(java.net.URL url,
java.lang.String addressString)
|
static ParsedURL |
getAbsolute(java.lang.String webAddr)
Create a PURL from an absolute address. |
static ParsedURL |
getAbsolute(java.lang.String webAddr,
java.lang.String errorDescriptor)
Create a PURL from an absolute address. |
java.lang.String |
getName()
Returns the name of the file or directory denoted by this abstract pathname. |
ParsedURL |
getRelative(java.lang.String relativeURLPath)
Form a ParsedURL, based on a relative path, using this as the base. |
ParsedURL |
getRelative(java.lang.String relativeURLPath,
java.lang.String errorDescriptor)
Form a ParsedURL, based on a relative path, using this as the base. |
static ParsedURL |
getRelative(java.net.URL base,
java.lang.String relativeURLPath,
java.lang.String errorDescriptor)
Form a new ParsedURL, relative from a supplied base URL. |
boolean |
getTimeout()
|
static java.net.URL |
getURL(java.net.URL base,
java.lang.String path,
java.lang.String error)
|
int |
hashCode()
Hash this by its URL. |
java.net.URL |
hashUrl()
|
boolean |
hasSuffix(java.lang.String s)
|
boolean |
isFile()
True if this ParsedURL represents an entity on the local file system. |
boolean |
isHTML()
Test type of document this refers to. |
static boolean |
isImageSuffix(java.lang.String thatSuffix)
|
boolean |
isImg()
|
boolean |
isJpeg()
|
boolean |
isNoAlpha()
|
boolean |
isNotFileOrExists()
|
boolean |
isPDF()
Test type of document this refers to. |
boolean |
isRSS()
Test type of document this refers to. |
boolean |
isUnsupported()
|
java.lang.String |
lc()
Uses lazy evaluation to minimize storage allocation. |
int |
mediaMimeIndex()
Get Media MimeType indexes. |
int |
mimeIndex()
Get MimeType index by seeing suffix(). |
java.lang.String |
noAnchorNoQueryPageString()
|
java.lang.String |
noAnchorPageString()
|
java.lang.String |
pathDirectoryString()
|
boolean |
protocolIsSupported()
Check whether the protocol is supported or not. |
static boolean |
protocolIsSupported(java.lang.String protocol)
Check whether the protocol is supported or not. |
boolean |
protocolIsUnsupported()
Check whether the protocol is supported or not. |
static boolean |
protocolIsUnsupported(java.lang.String protocol)
Check whether the protocol is supported or not. |
void |
recycle()
Free all all resources associated with this, rendering it no longer usable. |
java.lang.String |
removePunctuation()
|
void |
resetCaches()
Free some memory resources. |
boolean |
sameDomain(ParsedURL other)
|
boolean |
sameHost(ParsedURL other)
|
java.lang.String |
shortString()
A shorter string for displaing in the modeline for debugging, and in popup messages. |
java.lang.String |
suffix()
Uses lazy evaluation to minimize storage allocation. |
static java.lang.String |
suffix(java.lang.String lc)
|
boolean |
supportedMime()
|
java.lang.String |
toString()
Uses lazy evaluation to minimize storage allocation. |
ElementState |
translateFromXML(TranslationSpace translationSpace)
Use this as the source of stuff to translate from XML |
java.net.URL |
url()
Uses lazy evaluation to minimize storage allocation. |
ParsedURL |
withArgs(java.lang.String args)
Form a new ParsedURL from this, and the args passed in. |
| Methods inherited from class ecologylab.generic.Debug |
|---|
classSimpleName, closeLoggingFile, debug, debug, debug, debug, debugA, debugA, debugA, debugI, debugI, debugI, error, error, getClassName, getClassName, getInteractive, getPackageName, getPackageName, getPackageName, initialize, level, level, level, logToFile, print, print, println, println, println, println, println, println, printlnA, printlnA, printlnA, printlnI, printlnI, printlnI, printlnI, setLoggingFile, show, show, superString, toggleInteractive, toString, warning, warning, weird, weird |
| Methods inherited from class java.lang.Object |
|---|
clone, finalize, getClass, notify, notifyAll, wait, wait, wait |
| Field Detail |
|---|
protected java.net.URL url
protected java.net.URL hashUrl
protected java.net.URL directory
protected java.lang.String string
protected java.lang.String lc
protected java.lang.String suffix
protected java.lang.String domain
public static final int TIMEOUT
| Constructor Detail |
|---|
public ParsedURL(java.net.URL url)
public ParsedURL(java.io.File file)
file - | Method Detail |
|---|
public boolean isNotFileOrExists()
public static ParsedURL getAbsolute(java.lang.String webAddr)
public static ParsedURL getAbsolute(java.lang.String webAddr,
java.lang.String errorDescriptor)
webAddr - url stringerrorDescriptor - which will be printed out in the trace file if there is something happen
converting from the url string to URL.
public final ParsedURL getRelative(java.lang.String relativeURLPath,
java.lang.String errorDescriptor)
relativeURLPath - Path relative to this.errorDescriptor -
public final ParsedURL getRelative(java.lang.String relativeURLPath)
relativeURLPath - Path relative to this.
public static ParsedURL getRelative(java.net.URL base,
java.lang.String relativeURLPath,
java.lang.String errorDescriptor)
relativeURLPath - errorDescriptor -
public ElementState translateFromXML(TranslationSpace translationSpace)
throws XMLTranslationException
translationSpace - Translations that specify package + class names for translating.
XMLTranslationException
public static java.net.URL getURL(java.net.URL base,
java.lang.String path,
java.lang.String error)
public java.lang.String toString()
toString in class Debugpublic java.lang.String lc()
public java.lang.String suffix()
public ParsedURL directoryPURL()
public java.net.URL directory()
public java.lang.String domain()
public static java.lang.String suffix(java.lang.String lc)
public final java.net.URL url()
public final java.net.URL hashUrl()
public java.lang.String noAnchorNoQueryPageString()
public java.lang.String noAnchorPageString()
public final boolean hasSuffix(java.lang.String s)
public ParsedURL createFromHTML(java.lang.String addressString)
ParsedURLs from urlStrings in
response to such as the a element's href
attribute, the img element's src attribute,
etc.
Does processing of some fancy stuff, like, in the case of
javascript: URLs, it mines them for embedded absolute
URLs, if possible, and uses only those embedded URLs.
addressString - This may be specify a relative or absolute url.
javascript:.
public ParsedURL createFromHTML(java.lang.String addressString,
boolean fromSearchPage)
ParsedURLs from urlStrings in
response to such as the a element's href
attribute, the img element's src attribute,
etc.
Does processing of some fancy stuff, like, in the case of
javascript: URLs, it mines them for embedded absolute
URLs, if possible, and uses only those embedded URLs.
addressString - This may be specify a relative or absolute url.fromSearchPage - If false, then add / to the end
of the URL if it seems to be a directory.
javascript:.
protected static ParsedURL get(java.net.URL url,
java.lang.String addressString)
public static ParsedURL createFromHTML(ParsedURL contextPURL,
java.lang.String addressString,
boolean fromSearchPage)
ParsedURLs from urlStrings in
response to such as the a element's href
attribute, the img element's src attribute,
etc.
Does processing of some fancy stuff, like, in the case of
javascript: URLs, it mines them for embedded absolute
URLs, if possible, and uses only those embedded URLs.
addressString - This may be specify a relative or absolute url.fromSearchPage - If false, then add / to the end
of the URL if it seems to be a directory.
javascript:.public java.lang.String removePunctuation()
public boolean sameDomain(ParsedURL other)
public boolean sameHost(ParsedURL other)
public boolean crawlable()
public boolean protocolIsSupported()
public static boolean protocolIsSupported(java.lang.String protocol)
public boolean protocolIsUnsupported()
public static boolean protocolIsUnsupported(java.lang.String protocol)
public boolean isImg()
public static boolean isImageSuffix(java.lang.String thatSuffix)
thatSuffix -
public boolean isJpeg()
public boolean isNoAlpha()
public boolean isHTML()
public boolean isPDF()
public boolean isRSS()
public int mimeIndex()
parsedURL - public int mediaMimeIndex()
parsedURL - public boolean isUnsupported()
public boolean supportedMime()
public java.lang.String directoryString()
public java.lang.String pathDirectoryString()
public boolean equals(java.lang.Object other)
equals in class java.lang.Objectpublic int hashCode()
hashCode in class java.lang.Objectpublic java.lang.String shortString()
public boolean isFile()
public java.io.File file()
public ParsedURL withArgs(java.lang.String args)
args -
public java.lang.String getName()
public PURLConnection connect()
connectionHelper -
public PURLConnection connect(ConnectionHelper connectionHelper)
connectionHelper -
public PURLConnection connect(ConnectionHelper connectionHelper,
int connectionTimeout,
int readTimeout)
connectionHelper - connectionTimeout - readTimeout -
public boolean getTimeout()
public void resetCaches()
public void recycle()
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||