DefaultDownloader.java

  1. /*
  2.  * *********************************************************************************************************************
  3.  *
  4.  * blueMarine II: Semantic Media Centre
  5.  * http://tidalwave.it/projects/bluemarine2
  6.  *
  7.  * Copyright (C) 2015 - 2021 by Tidalwave s.a.s. (http://tidalwave.it)
  8.  *
  9.  * *********************************************************************************************************************
  10.  *
  11.  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  12.  * the License. You may obtain a copy of the License at
  13.  *
  14.  *     http://www.apache.org/licenses/LICENSE-2.0
  15.  *
  16.  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  17.  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the
  18.  * specific language governing permissions and limitations under the License.
  19.  *
  20.  * *********************************************************************************************************************
  21.  *
  22.  * git clone https://bitbucket.org/tidalwave/bluemarine2-src
  23.  * git clone https://github.com/tidalwave-it/bluemarine2-src
  24.  *
  25.  * *********************************************************************************************************************
  26.  */
  27. package it.tidalwave.bluemarine2.downloader.impl;

  28. import javax.annotation.Nonnull;
  29. import javax.annotation.PostConstruct;
  30. import javax.inject.Inject;
  31. import java.util.List;
  32. import java.util.Date;
  33. import java.io.ByteArrayOutputStream;
  34. import java.io.IOException;
  35. import java.net.URISyntaxException;
  36. import java.net.URL;
  37. import it.tidalwave.util.annotation.VisibleForTesting;
  38. import org.apache.http.HttpRequest;
  39. import org.apache.http.HttpResponse;
  40. import org.apache.http.ProtocolException;
  41. import org.apache.http.client.RedirectStrategy;
  42. import org.apache.http.client.cache.CacheResponseStatus;
  43. import org.apache.http.client.cache.HttpCacheContext;
  44. import org.apache.http.client.cache.HttpCacheEntry;
  45. import org.apache.http.client.cache.Resource;
  46. import org.apache.http.client.methods.CloseableHttpResponse;
  47. import org.apache.http.client.methods.HttpGet;
  48. import org.apache.http.client.methods.HttpUriRequest;
  49. import org.apache.http.message.BasicHeader;
  50. import org.apache.http.protocol.HttpContext;
  51. import org.apache.http.impl.client.CloseableHttpClient;
  52. import org.apache.http.impl.client.cache.CacheConfig;
  53. import org.apache.http.impl.client.cache.CachingHttpClients;
  54. import org.apache.http.impl.client.cache.HeapResource;
  55. import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
  56. import it.tidalwave.util.NotFoundException;
  57. import it.tidalwave.messagebus.MessageBus;
  58. import it.tidalwave.messagebus.annotation.ListensTo;
  59. import it.tidalwave.messagebus.annotation.SimpleMessageSubscriber;
  60. import it.tidalwave.bluemarine2.message.PowerOnNotification;
  61. import it.tidalwave.bluemarine2.downloader.DownloadComplete;
  62. import it.tidalwave.bluemarine2.downloader.DownloadComplete.Origin;
  63. import it.tidalwave.bluemarine2.downloader.DownloadRequest;
  64. import lombok.Cleanup;
  65. import lombok.extern.slf4j.Slf4j;
  66. import org.apache.http.HttpResponseInterceptor;
  67. import static it.tidalwave.bluemarine2.downloader.DownloaderPropertyNames.CACHE_FOLDER_PATH;

  68. /***********************************************************************************************************************
  69.  *
  70.  * @author  Fabrizio Giudici
  71.  *
  72.  **********************************************************************************************************************/
  73. @SimpleMessageSubscriber @Slf4j
  74. public class DefaultDownloader
  75.   {
  76.     @Inject
  77.     private MessageBus messageBus;

  78.     @Inject
  79.     private SimpleHttpCacheStorage cacheStorage;

  80.     private PoolingHttpClientConnectionManager connectionManager;

  81.     private CacheConfig cacheConfig;

  82.     private CloseableHttpClient httpClient;

  83. private final HttpResponseInterceptor killCacheHeaders = (HttpResponse
  84.  response, HttpContext context) ->
  85.  {
  86.  response.removeHeaders("Expires");
  87.  response.removeHeaders("Pragma");
  88.  response.removeHeaders("Cache-Control");
  89.  response.addHeader("Expires", "Mon, 31 Dec 2099 00:00:00 GMT");
  90.  };

  91.     /*******************************************************************************************************************
  92.      *
  93.      *
  94.      *
  95.      ******************************************************************************************************************/
  96.     // FIXME: this is because there's a fix, and we explicitly save stuff in the cache - see below
  97.     private final RedirectStrategy dontFollowRedirect = new RedirectStrategy()
  98.       {
  99.         @Override
  100.         public boolean isRedirected (HttpRequest request, HttpResponse response, HttpContext context)
  101.           throws ProtocolException
  102.           {
  103.             return false;
  104.           }

  105.         @Override
  106.         public HttpUriRequest getRedirect (HttpRequest request, HttpResponse response, HttpContext context)
  107.           throws ProtocolException
  108.           {
  109.             return null;
  110.           }
  111.       };

  112.     /*******************************************************************************************************************
  113.      *
  114.      *
  115.      *
  116.      ******************************************************************************************************************/
  117.     @PostConstruct
  118.     @VisibleForTesting void initialize()
  119.       {
  120.         connectionManager = new PoolingHttpClientConnectionManager();
  121.         connectionManager.setMaxTotal(200);
  122.         connectionManager.setDefaultMaxPerRoute(20);

  123.         cacheConfig = CacheConfig.custom()
  124.                 .setAllow303Caching(true)
  125.                 .setMaxCacheEntries(Integer.MAX_VALUE)
  126.                 .setMaxObjectSize(Integer.MAX_VALUE)
  127.                 .setSharedCache(false)
  128.                 .setHeuristicCachingEnabled(true)
  129.                 .build();
  130.         httpClient = CachingHttpClients.custom()
  131.                 .setHttpCacheStorage(cacheStorage)
  132.                 .setCacheConfig(cacheConfig)
  133.                 .setRedirectStrategy(dontFollowRedirect)
  134.                 .setUserAgent("blueMarine (fabrizio.giudici@tidalwave.it)")
  135.                 .setDefaultHeaders(List.of(new BasicHeader("Accept", "application/n3")))
  136.                 .setConnectionManager(connectionManager)
  137.                 .addInterceptorFirst(killCacheHeaders) // FIXME: only if  explicitly configured
  138.          .build();
  139.       }

  140.     /*******************************************************************************************************************
  141.      *
  142.      *
  143.      *
  144.      ******************************************************************************************************************/
  145.     @VisibleForTesting void onPowerOnNotification (@ListensTo @Nonnull final PowerOnNotification notification)
  146.       throws NotFoundException
  147.       {
  148.         log.info("onPowerOnNotification({})", notification);
  149.         cacheStorage.setFolderPath(notification.getProperties().get(CACHE_FOLDER_PATH));
  150.       }

  151.     /*******************************************************************************************************************
  152.      *
  153.      *
  154.      *
  155.      ******************************************************************************************************************/
  156.     @VisibleForTesting void onDownloadRequest (@ListensTo @Nonnull final DownloadRequest request)
  157.       throws URISyntaxException
  158.       {
  159.         try
  160.           {
  161.             log.info("onDownloadRequest({})", request);

  162.             URL url = request.getUrl();

  163.             for (;;)
  164.               {
  165.                 final HttpCacheContext context = HttpCacheContext.create();
  166.                 @Cleanup final CloseableHttpResponse response = httpClient.execute(new HttpGet(url.toURI()), context);
  167.                 final byte[] bytes = bytesFrom(response);
  168.                 final CacheResponseStatus cacheResponseStatus = context.getCacheResponseStatus();
  169.                 log.debug(">>>> cacheResponseStatus: {}", cacheResponseStatus);

  170.                 final Origin origin = cacheResponseStatus.equals(CacheResponseStatus.CACHE_HIT) ? Origin.CACHE
  171.                                                                                                 : Origin.NETWORK;

  172.                 // FIXME: shouldn't do this by myself
  173.                 // FIXME: upon configuration, everything should be cached (needed for supporting integration tests)
  174.                 if (!origin.equals(Origin.CACHE) && List.of(200, 303).contains(response.getStatusLine().getStatusCode()))
  175.                   {
  176.                     final Date date = new Date();
  177.                     final Resource resource = new HeapResource(bytes);
  178.                     cacheStorage.putEntry(url.toExternalForm(),
  179.                             new HttpCacheEntry(date, date, response.getStatusLine(), response.getAllHeaders(), resource));
  180.                   }

  181.                 // FIXME: if the redirect were enabled, we could drop this check
  182.                 if (request.isOptionPresent(DownloadRequest.Option.FOLLOW_REDIRECT)
  183.                     && response.getStatusLine().getStatusCode() == 303) // SEE_OTHER FIXME
  184.                   {
  185.                     url = new URL(response.getFirstHeader("Location").getValue());
  186.                     log.info(">>>> following 'see also' to {} ...", url);
  187.                   }
  188.                 else
  189.                   {
  190.                     messageBus.publish(new DownloadComplete(request.getUrl(),
  191.                                                             response.getStatusLine().getStatusCode(),
  192.                                                             bytes,
  193.                                                             origin));
  194.                     return;
  195.                   }
  196.               }
  197.           }
  198.         catch (IOException e)
  199.           {
  200.             log.error("{}: {}", request.getUrl(), e.toString());
  201.             messageBus.publish(new DownloadComplete(request.getUrl(), -1, new byte[0], Origin.NETWORK));
  202.           }
  203.       }

  204.     /*******************************************************************************************************************
  205.      *
  206.      *
  207.      *
  208.      ******************************************************************************************************************/
  209.     @Nonnull
  210.     private byte[] bytesFrom (@Nonnull final HttpResponse response)
  211.       throws IOException
  212.       {
  213.         final ByteArrayOutputStream baos = new ByteArrayOutputStream();

  214.         if (response.getEntity() != null)
  215.           {
  216.             response.getEntity().writeTo(baos);
  217.           }

  218.         return baos.toByteArray();
  219.       }
  220.   }