X-Git-Url: https://git.argeo.org/?a=blobdiff_plain;f=org.argeo.cms%2Fsrc%2Forg%2Fargeo%2Fcms%2Finternal%2Fkernel%2FNodeHttp.java;h=4a6ad233799246036f73afd3748c538f81dde16c;hb=972528f4de2d00690362c01d3ce843ca9cd10250;hp=1d0f0be446f7591e55f94cb0ba11c2d7d5ffc1cc;hpb=e5c68bdc434baea51c45f16948a615099aaa6c85;p=lgpl%2Fargeo-commons.git diff --git a/org.argeo.cms/src/org/argeo/cms/internal/kernel/NodeHttp.java b/org.argeo.cms/src/org/argeo/cms/internal/kernel/NodeHttp.java index 1d0f0be44..4a6ad2337 100644 --- a/org.argeo.cms/src/org/argeo/cms/internal/kernel/NodeHttp.java +++ b/org.argeo.cms/src/org/argeo/cms/internal/kernel/NodeHttp.java @@ -1,9 +1,24 @@ package org.argeo.cms.internal.kernel; +import static javax.jcr.Property.JCR_DESCRIPTION; +import static javax.jcr.Property.JCR_LAST_MODIFIED; +import static javax.jcr.Property.JCR_TITLE; +import static org.argeo.cms.CmsTypes.CMS_IMAGE; + import java.io.IOException; +import java.io.PrintWriter; +import java.security.PrivilegedExceptionAction; import java.security.cert.X509Certificate; +import java.util.Calendar; +import java.util.Collection; import java.util.Enumeration; +import javax.jcr.Node; +import javax.jcr.NodeIterator; +import javax.jcr.Repository; +import javax.jcr.RepositoryException; +import javax.jcr.Session; +import javax.security.auth.Subject; import javax.servlet.FilterChain; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; @@ -15,7 +30,10 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.argeo.cms.CmsException; import org.argeo.jcr.ArgeoJcrConstants; -import org.eclipse.equinox.http.servlet.ExtendedHttpService; +import org.argeo.jcr.JcrUtils; +import org.osgi.framework.BundleContext; +import org.osgi.framework.ServiceReference; +import org.osgi.service.http.HttpService; /** * Intercepts and enriches http access, mainly focusing on security and @@ -30,14 +48,17 @@ class NodeHttp implements KernelConstants, ArgeoJcrConstants { // private final DoSFilter dosFilter; // private final QoSFilter qosFilter; - NodeHttp(ExtendedHttpService httpService) { + private BundleContext bc; + + NodeHttp(HttpService httpService, BundleContext bc) { + this.bc = bc; // rootFilter = new RootFilter(); // dosFilter = new CustomDosFilter(); // qosFilter = new QoSFilter(); try { httpService.registerServlet("/!", new LinkServlet(), null, null); - // httpService.registerFilter("/", rootFilter, null, null); + httpService.registerServlet("/robots.txt", new RobotServlet(), null, null); } catch (Exception e) { throw new CmsException("Cannot register filters", e); } @@ -50,49 +71,168 @@ class NodeHttp implements KernelConstants, ArgeoJcrConstants { private static final long serialVersionUID = 3749990143146845708L; @Override - protected void service(HttpServletRequest request, - HttpServletResponse response) throws ServletException, - IOException { + protected void service(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { String path = request.getPathInfo(); String userAgent = request.getHeader("User-Agent").toLowerCase(); boolean isBot = false; boolean isCompatibleBrowser = false; - if (userAgent.contains("bot")) { + if (userAgent.contains("bot") || userAgent.contains("facebook") || userAgent.contains("twitter")) { isBot = true; - } else if (userAgent.contains("webkit") - || userAgent.contains("gecko") - || userAgent.contains("firefox") - || userAgent.contains("msie") - || userAgent.contains("chrome") - || userAgent.contains("chromium") - || userAgent.contains("opera") - || userAgent.contains("browser")) { + } else if (userAgent.contains("webkit") || userAgent.contains("gecko") || userAgent.contains("firefox") + || userAgent.contains("msie") || userAgent.contains("chrome") || userAgent.contains("chromium") + || userAgent.contains("opera") || userAgent.contains("browser")) { isCompatibleBrowser = true; } - if (isCompatibleBrowser) {// redirect - response.setHeader("Location", "/#" + path); - response.setStatus(HttpServletResponse.SC_FOUND); - } else { - if (isBot && log.isDebugEnabled()) - log.debug(request.getHeader("User-Agent") + " is a bot"); - // TODO pure html - throw new UnsupportedOperationException(); + if (isBot) { + log.warn("# BOT " + request.getHeader("User-Agent")); + canonicalAnswer(request, response, path); + return; + } + + if (isCompatibleBrowser && log.isTraceEnabled()) + log.trace("# BWS " + request.getHeader("User-Agent")); + redirectTo(response, "/#" + path); + } + + private void redirectTo(HttpServletResponse response, String location) { + response.setHeader("Location", location); + response.setStatus(HttpServletResponse.SC_FOUND); + } + + // private boolean canonicalAnswerNeededBy(HttpServletRequest request) { + // String userAgent = request.getHeader("User-Agent").toLowerCase(); + // return userAgent.startsWith("facebookexternalhit/"); + // } + + /** For bots which don't understand RWT. */ + private void canonicalAnswer(HttpServletRequest request, HttpServletResponse response, String path) { + Session session = null; + try { + PrintWriter writer = response.getWriter(); + session = Subject.doAs(KernelUtils.anonymousLogin(), new PrivilegedExceptionAction() { + + @Override + public Session run() throws Exception { + Collection> srs = bc.getServiceReferences(Repository.class, "(" + + ArgeoJcrConstants.JCR_REPOSITORY_ALIAS + "=" + ArgeoJcrConstants.ALIAS_NODE + ")"); + Repository repository = bc.getService(srs.iterator().next()); + return repository.login(); + } + + }); + Node node = session.getNode(path); + String title = node.hasProperty(JCR_TITLE) ? node.getProperty(JCR_TITLE).getString() : node.getName(); + String desc = node.hasProperty(JCR_DESCRIPTION) ? node.getProperty(JCR_DESCRIPTION).getString() : null; + Calendar lastUpdate = node.hasProperty(JCR_LAST_MODIFIED) + ? node.getProperty(JCR_LAST_MODIFIED).getDate() : null; + String url = KernelUtils.getCanonicalUrl(node, request); + String imgUrl = null; + loop: for (NodeIterator it = node.getNodes(); it.hasNext();) { + // Takes the first found cms:image + Node child = it.nextNode(); + if (child.isNodeType(CMS_IMAGE)) { + imgUrl = KernelUtils.getDataUrl(child, request); + break loop; + } + } + StringBuilder buf = new StringBuilder(); + buf.append(""); + buf.append(""); + writeMeta(buf, "og:title", escapeHTML(title)); + writeMeta(buf, "og:type", "website"); + buf.append(""); + buf.append(""); + writeMeta(buf, "og:url", url); + if (desc != null) + writeMeta(buf, "og:description", escapeHTML(desc)); + if (imgUrl != null) + writeMeta(buf, "og:image", imgUrl); + if (lastUpdate != null) + writeMeta(buf, "og:updated_time", Long.toString(lastUpdate.getTime().getTime())); + buf.append(""); + buf.append(""); + buf.append( + "

!! This page is meant for indexing robots, not for real people," + " visit ").append(escapeHTML(title)).append(" instead.

"); + writeCanonical(buf, node); + buf.append(""); + buf.append(""); + writer.print(buf.toString()); + + response.setHeader("Content-Type", "text/html"); + writer.flush(); + } catch (Exception e) { + throw new CmsException("Cannot write canonical answer", e); + } finally { + JcrUtils.logoutQuietly(session); } } + + /** + * From + * http://stackoverflow.com/questions/1265282/recommended-method-for- + * escaping-html-in-java (+ escaping '). TODO Use + * org.apache.commons.lang.StringEscapeUtils + */ + private String escapeHTML(String s) { + StringBuilder out = new StringBuilder(Math.max(16, s.length())); + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + if (c > 127 || c == '\'' || c == '"' || c == '<' || c == '>' || c == '&') { + out.append("&#"); + out.append((int) c); + out.append(';'); + } else { + out.append(c); + } + } + return out.toString(); + } + + private void writeMeta(StringBuilder buf, String tag, String value) { + buf.append(""); + } + + private void writeCanonical(StringBuilder buf, Node node) throws RepositoryException { + buf.append("
"); + if (node.hasProperty(JCR_TITLE)) + buf.append("

").append(node.getProperty(JCR_TITLE).getString()).append("

"); + if (node.hasProperty(JCR_DESCRIPTION)) + buf.append("

").append(node.getProperty(JCR_DESCRIPTION).getString()).append("

"); + NodeIterator children = node.getNodes(); + while (children.hasNext()) { + writeCanonical(buf, children.nextNode()); + } + buf.append("
"); + } + } + + class RobotServlet extends HttpServlet { + private static final long serialVersionUID = 7935661175336419089L; + + @Override + protected void service(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + PrintWriter writer = response.getWriter(); + writer.append("User-agent: *\n"); + writer.append("Disallow:\n"); + response.setHeader("Content-Type", "text/plain"); + writer.flush(); + } + } /** Intercepts all requests. Authenticates. */ class RootFilter extends HttpFilter { @Override - public void doFilter(HttpSession httpSession, - HttpServletRequest request, HttpServletResponse response, + public void doFilter(HttpSession httpSession, HttpServletRequest request, HttpServletResponse response, FilterChain filterChain) throws IOException, ServletException { if (log.isTraceEnabled()) { - log.trace(request.getRequestURL().append( - request.getQueryString() != null ? "?" - + request.getQueryString() : "")); + log.trace(request.getRequestURL() + .append(request.getQueryString() != null ? "?" + request.getQueryString() : "")); logRequest(request); } @@ -119,13 +259,10 @@ class NodeHttp implements KernelConstants, ArgeoJcrConstants { } // redirect long RWT paths to anchor - String path = request.getRequestURI().substring( - servletPath.length()); + String path = request.getRequestURI().substring(servletPath.length()); int pathLength = path.length(); - if (pathLength != 0 && (path.charAt(0) == '/') - && !servletPath.endsWith("rwt-resources") - && !path.startsWith(KernelConstants.PATH_WORKBENCH) - && path.lastIndexOf('/') != 0) { + if (pathLength != 0 && (path.charAt(0) == '/') && !servletPath.endsWith("rwt-resources") + && !path.startsWith(KernelConstants.PATH_WORKBENCH) && path.lastIndexOf('/') != 0) { String newLocation = request.getServletPath() + "#" + path; response.setHeader("Location", newLocation); response.setStatus(HttpServletResponse.SC_FOUND); @@ -165,8 +302,7 @@ class NodeHttp implements KernelConstants, ArgeoJcrConstants { } private X509Certificate extractCertificate(HttpServletRequest req) { - X509Certificate[] certs = (X509Certificate[]) req - .getAttribute("javax.servlet.request.X509Certificate"); + X509Certificate[] certs = (X509Certificate[]) req.getAttribute("javax.servlet.request.X509Certificate"); if (null != certs && certs.length > 0) { return certs[0]; }