FUNCTION StripUnwantedHTML RETURNS CHARACTER
(INPUT pcContext AS CHARACTER):
/* Removes/strips HTML tags from the passed string context, except for the HTML
element whiched are defined in the xcAllowedHTMLElements preprocessor.....*/
DEFINE VARIABLE cElement AS CHARACTER NO-UNDO INITIAL ''.
DEFINE VARIABLE iEndPos AS INTEGER NO-UNDO INITIAL 0.
DEFINE VARIABLE iStartPos AS INTEGER NO-UNDO INITIAL 0.
DEFINE VARIABLE iElementLength AS INTEGER NO-UNDO INITIAL 0.
&SCOPED-DEFINE xcAllowedHTMLElements 'a,em,strong,cite,code,ul,ol,li,dl,dt,dd,img'
&SCOPED-DEFINE xcHTMLPrefix '<'
&SCOPED-DEFINE xcHTMLSuffix '>'
&SCOPED-DEFINE xcHTMLClosing '~/'
&SCOPED-DEFINE xcNullChar ''
&SCOPED-DEFINE xcSpaceChar ' '
ASSIGN
iStartPos = INDEX(pcContext, {&xcHTMLPrefix}). /* Try and find the first opening HTML Tag*/
DO WHILE iStartPos GT 0 AND
LENGTH(pcContext) GT 0:
ASSIGN
iEndPos = INDEX( pcContext, {&xcHTMLSuffix}, iStartPos ). /* Try and find the subseqent suffix tag elemnt..*/
/* Continue the serach and replace process
if the HTML tag suffix is found... */
IF iEndPos GT 0 THEN
DO:
ASSIGN
iElementLength = iEndPos - iStartPos /* Calculate the HTML tag's Length..*/
cElement = SUBSTRING(pcContext,iStartPos,iElementLength + 1) /* Extract the HTML element from the content..*/
cElement = ENTRY(1,cElement, {&xcSpaceChar}) /* Just isolate the element tag, ignoring any element attributes.*/
cElement = REPLACE(cElement, {&xcHTMLPrefix},{&xcNULLChar}) /* Remove the HTML tag's prefix..*/
cElement = REPLACE(cElement, {&xcHTMLSuffix},{&xcNULLChar}) /* Remove the HTML tag's suffix..*/
cElement = REPLACE(cElement, {&xcHTMLClosing},{&xcNULLChar}). /* Remove the HTML tag's closing marker..*/
/* If the isolated HTML element can not be found in the
"Allowed HTML Elements List", then strip the HTML tag
out.. Otherwise continue on with the search.. */
IF NOT CAN-DO({&xcAllowedHTMLElements},cElement) THEN
DO:
SUBSTRING(pcContext,iStartPos,iElementLength + 1) = ''.
ASSIGN
iStartPos = INDEX(pcContext, {&xcHTMLPrefix}, iStartPos). /* Try and find the start of the next HTML element
starting from the beging of the last known element..*/
END. /* END OF IF NOT CAN-DO({&xcAllowedHTMLElements},cElement) BLOCK..*/
ELSE
ASSIGN
iStartPos = INDEX(pcContext, {&xcHTMLPrefix}, iEndPos). /* Try and find the start of the next HTML element
starting from the end of the last known element..*/
ASSIGN
iEndPos = 0. /* Reset last known element position, ending.*/
END. /* END OF IF iEndPos GT 0 BLOCK*/
ELSE
ASSIGN
iStartPos = 0. /* Reset last known element position, beging */
END. /* END OF DO WHILE iStartPos GT 0 AND LENGTH(pcContext) GT 0 BLOCK.*/
RETURN pcContext.
END FUNCTION.
/* Test the StripUnwanntedHTML function */
MESSAGE
StripUnwantedHTML(INPUT '<!DOCTYPE html><html><head><title>Hello HTML </title></head><body><p>Hello World! Click <a href="http://www.ekkoguardian.com">here</a> for a great service.</p> </body></html>')
VIEW-AS ALERT-BOX INFO.