Skip to content

Commit

Permalink
Fail gracefully on SPLASH_URL without protocol
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Apr 5, 2019
1 parent e40ca4f commit 9caf83d
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
6 changes: 6 additions & 0 deletions scrapy_splash/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import copy
import json
import logging
import re
import warnings
from collections import defaultdict

Expand Down Expand Up @@ -233,6 +234,11 @@ def __init__(self, crawler, splash_base_url, slot_policy, log_400):
def from_crawler(cls, crawler):
splash_base_url = crawler.settings.get('SPLASH_URL',
cls.default_splash_url)
if not re.match('^https?://', splash_base_url):
raise NotConfigured(
'The SPLASH_URL setting does not start with http:// or '
'https://: {}'.format(splash_base_url)
)
log_400 = crawler.settings.getbool('SPLASH_LOG_400', True)
slot_policy = crawler.settings.get('SPLASH_SLOT_POLICY',
cls.default_policy)
Expand Down
14 changes: 14 additions & 0 deletions tests/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import json
import base64

from pytest import raises
import scrapy
from scrapy.core.engine import ExecutionEngine
from scrapy.exceptions import NotConfigured
from scrapy.utils.test import get_crawler
from scrapy.http import Response, TextResponse
from scrapy.downloadermiddlewares.httpcache import HttpCacheMiddleware
Expand Down Expand Up @@ -765,3 +767,15 @@ def test_adjust_timeout():
})
req2 = mw.process_request(req2, None)
assert req2.meta['download_timeout'] == 30


def test_bad_splash_url():
crawler = _get_crawler({'SPLASH_URL': 'localhost:1234'})
with raises(NotConfigured):
mw = SplashMiddleware.from_crawler(crawler)


def test_bad_slot_policy():
crawler = _get_crawler({'SPLASH_SLOT_POLICY': 'asdf'})
with raises(NotConfigured):
mw = SplashMiddleware.from_crawler(crawler)

0 comments on commit 9caf83d

Please sign in to comment.