001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.util.CharPool;
018 import com.liferay.portal.kernel.util.Html;
019 import com.liferay.portal.kernel.util.HttpUtil;
020 import com.liferay.portal.kernel.util.StringBundler;
021 import com.liferay.portal.kernel.util.StringPool;
022 import com.liferay.portal.kernel.util.StringUtil;
023
024 import java.util.regex.Matcher;
025 import java.util.regex.Pattern;
026
027 import net.htmlparser.jericho.Source;
028 import net.htmlparser.jericho.TextExtractor;
029
030
038 public class HtmlImpl implements Html {
039
040 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
041
042 public static final int ESCAPE_MODE_CSS = 2;
043
044 public static final int ESCAPE_MODE_JS = 3;
045
046 public static final int ESCAPE_MODE_TEXT = 4;
047
048 public static final int ESCAPE_MODE_URL = 5;
049
050 public String escape(String text) {
051 if (text == null) {
052 return null;
053 }
054
055 if (text.length() == 0) {
056 return StringPool.BLANK;
057 }
058
059
060
061
062
063 StringBundler sb = null;
064
065 int lastReplacementIndex = 0;
066
067 for (int i = 0; i < text.length(); i++) {
068 char c = text.charAt(i);
069
070 String replacement = null;
071
072 switch (c) {
073 case '<':
074 replacement = "<";
075
076 break;
077
078 case '>':
079 replacement = ">";
080
081 break;
082
083 case '&':
084 replacement = "&";
085
086 break;
087
088 case '"':
089 replacement = """;
090
091 break;
092
093 case '\'':
094 replacement = "'";
095
096 break;
097
098 case '\u00bb':
099 replacement = "»";
100
101 break;
102
103 case '\u2013':
104 replacement = "–";
105
106 break;
107
108 case '\u2014':
109 replacement = "—";
110
111 break;
112 }
113
114 if (replacement != null) {
115 if (sb == null) {
116 sb = new StringBundler();
117 }
118
119 if (i > lastReplacementIndex) {
120 sb.append(text.substring(lastReplacementIndex, i));
121 }
122
123 sb.append(replacement);
124
125 lastReplacementIndex = i + 1;
126 }
127 }
128
129 if (sb == null) {
130 return text;
131 }
132 else {
133 if (lastReplacementIndex < text.length()) {
134 sb.append(text.substring(lastReplacementIndex));
135 }
136
137 return sb.toString();
138 }
139 }
140
141 public String escape(String text, int type) {
142 if (text == null) {
143 return null;
144 }
145
146 if (text.length() == 0) {
147 return StringPool.BLANK;
148 }
149
150 String prefix = StringPool.BLANK;
151 String postfix = StringPool.BLANK;
152
153 if (type == ESCAPE_MODE_ATTRIBUTE) {
154 prefix = "&#x";
155 postfix = StringPool.SEMICOLON;
156 }
157 else if (type == ESCAPE_MODE_CSS) {
158 prefix = StringPool.BACK_SLASH;
159 }
160 else if (type == ESCAPE_MODE_JS) {
161 prefix = "\\x";
162 }
163 else if (type == ESCAPE_MODE_URL) {
164 return HttpUtil.encodeURL(text, true);
165 }
166 else {
167 return escape(text);
168 }
169
170 StringBuilder sb = new StringBuilder();
171
172 for (int i = 0; i < text.length(); i++) {
173 char c = text.charAt(i);
174
175 if ((Character.isLetterOrDigit(c)) ||
176 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
177
178 sb.append(c);
179 }
180 else {
181 sb.append(prefix);
182
183 String hexString = StringUtil.toHexString(c);
184
185 if (hexString.length() == 1) {
186 sb.append(StringPool.ASCII_TABLE[48]);
187 }
188
189 sb.append(hexString);
190 sb.append(postfix);
191 }
192 }
193
194 if (sb.length() == text.length()) {
195 return text;
196 }
197 else {
198 return sb.toString();
199 }
200 }
201
202 public String escapeAttribute(String attribute) {
203 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
204 }
205
206 public String escapeCSS(String css) {
207 return escape(css, ESCAPE_MODE_CSS);
208 }
209
210 public String escapeHREF(String href) {
211 if (href == null) {
212 return null;
213 }
214
215 if (href.length() == 0) {
216 return StringPool.BLANK;
217 }
218
219 if (href.indexOf(StringPool.COLON) == 10) {
220 String protocol = href.substring(0, 10).toLowerCase();
221
222 if (protocol.equals("javascript")) {
223 return StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
224 }
225 }
226
227 return href;
228 }
229
230 public String escapeJS(String js) {
231 return escape(js, ESCAPE_MODE_JS);
232 }
233
234 public String escapeURL(String url) {
235 return escape(url, ESCAPE_MODE_URL);
236 }
237
238 public String extractText(String html) {
239 if (html == null) {
240 return null;
241 }
242
243 Source source = new Source(html);
244
245 TextExtractor textExtractor = source.getTextExtractor();
246
247 return textExtractor.toString();
248 }
249
250 public String fromInputSafe(String text) {
251 return StringUtil.replace(text, "&", "&");
252 }
253
254 public String replaceMsWordCharacters(String text) {
255 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
256 }
257
258 public String stripBetween(String text, String tag) {
259 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
260 }
261
262 public String stripComments(String text) {
263 return StringUtil.stripBetween(text, "<!--", "-->");
264 }
265
266 public String stripHtml(String text) {
267 if (text == null) {
268 return null;
269 }
270
271 text = stripComments(text);
272
273 StringBuilder sb = new StringBuilder(text.length());
274
275 int x = 0;
276 int y = text.indexOf("<");
277
278 while (y != -1) {
279 sb.append(text.substring(x, y));
280 sb.append(StringPool.SPACE);
281
282
283
284 boolean scriptFound = isScriptTag(text, y + 1);
285
286 if (scriptFound) {
287 int pos = y + _TAG_SCRIPT.length;
288
289
290
291 pos = text.indexOf(">", pos);
292
293 if (pos >= 0) {
294
295
296
297
298 if (text.charAt(pos-1) != '/') {
299
300
301
302 for (;;) {
303 pos = text.indexOf("</", pos);
304
305 if (pos >= 0) {
306 if (isScriptTag(text, pos + 2)) {
307 y = pos;
308
309 break;
310 }
311 else {
312
313
314
315 pos += 2;
316 }
317 }
318 else {
319 break;
320 }
321 }
322 }
323 }
324 }
325
326 x = text.indexOf(">", y);
327
328 if (x == -1) {
329 break;
330 }
331
332 x++;
333
334 if (x < y) {
335
336
337
338 break;
339 }
340
341 y = text.indexOf("<", x);
342 }
343
344 if (y == -1) {
345 sb.append(text.substring(x, text.length()));
346 }
347
348 return sb.toString();
349 }
350
351 public String toInputSafe(String text) {
352 return StringUtil.replace(
353 text,
354 new String[] {"&", "\""},
355 new String[] {"&", """});
356 }
357
358 public String unescape(String text) {
359 if (text == null) {
360 return null;
361 }
362
363 if (text.length() == 0) {
364 return StringPool.BLANK;
365 }
366
367
368
369 text = StringUtil.replace(text, "<", "<");
370 text = StringUtil.replace(text, ">", ">");
371 text = StringUtil.replace(text, "&", "&");
372 text = StringUtil.replace(text, """, "\"");
373 text = StringUtil.replace(text, "'", "'");
374 text = StringUtil.replace(text, "(", "(");
375 text = StringUtil.replace(text, ")", ")");
376 text = StringUtil.replace(text, ",", ",");
377 text = StringUtil.replace(text, "#", "#");
378 text = StringUtil.replace(text, "%", "%");
379 text = StringUtil.replace(text, ";", ";");
380 text = StringUtil.replace(text, "=", "=");
381 text = StringUtil.replace(text, "+", "+");
382 text = StringUtil.replace(text, "-", "-");
383
384 return text;
385 }
386
387 public String unescapeCDATA(String text) {
388 if (text == null) {
389 return null;
390 }
391
392 if (text.length() == 0) {
393 return StringPool.BLANK;
394 }
395
396 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
397 text = StringUtil.replace(text, "]]>", "]]>");
398
399 return text;
400 }
401
402 public String wordBreak(String text, int columns) {
403 StringBundler sb = new StringBundler();
404
405 int length = 0;
406 int lastWrite = 0;
407 int pos = 0;
408
409 Pattern pattern = Pattern.compile("([\\s<&]|$)");
410
411 Matcher matcher = pattern.matcher(text);
412
413 while (matcher.find()) {
414 if (matcher.start() < pos) {
415 continue;
416 }
417
418 while ((length + matcher.start() - pos) >= columns) {
419 pos += columns - length;
420
421 sb.append(text.substring(lastWrite, pos));
422 sb.append("<wbr/>­");
423
424 length = 0;
425 lastWrite = pos;
426 }
427
428 length += matcher.start() - pos;
429
430 String group = matcher.group();
431
432 if (group.equals(StringPool.AMPERSAND)) {
433 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
434
435 if (x != -1) {
436 length++;
437 pos = x + 1;
438 }
439
440 continue;
441 }
442
443 if (group.equals(StringPool.LESS_THAN)) {
444 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
445
446 if (x != -1) {
447 pos = x + 1;
448 }
449
450 continue;
451 }
452
453 if (group.equals(StringPool.SPACE) ||
454 group.equals(StringPool.NEW_LINE)) {
455
456 length = 0;
457 pos = matcher.start() + 1;
458 }
459 }
460
461 sb.append(text.substring(lastWrite));
462
463 return sb.toString();
464 }
465
466 protected boolean isScriptTag(String text, int pos) {
467 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
468 char item;
469
470 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
471 item = text.charAt(pos++);
472
473 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
474 return false;
475 }
476 }
477
478 item = text.charAt(pos);
479
480
481
482 return !Character.isLetter(item);
483 }
484 else {
485 return false;
486 }
487 }
488
489 private static final String[] _MS_WORD_HTML = new String[] {
490 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
491 };
492
493 private static final String[] _MS_WORD_UNICODE = new String[] {
494 "\u00ae", "\u2019", "\u201c", "\u201d"
495 };
496
497 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
498
499 }