We are having a problem with one of our websites, sometimes we see a long url that is not related to any real page on our site. For example the URL should be
https://example.com/browse
But one time in a hundred we get this
https://example.com/index.php/module/action/param1/static/PFBC/js/jquery/rss/signup/static/js/jquery/templates/themes/love/img/icon/asset/css/legal/user/album/tipocorneo/me/browse
Its not always the same URL it changes from time to time but my question is this… Is there a rewrite rule we could use that simply removes the extra part of the URL if its present?
RewriteEngine On
RewriteCond %{HTTPS} off
RewriteRule ^(.*)$ https://%{HTTP_HOST}%{REQUEST_URI} [L,R=301]
<IfModule mod_rewrite.c>
<IfModule mod_negotiation.c>
Options -MultiViews -Indexes
</IfModule>
Options +FollowSymLinks
</IfModule>
<IfModule mod_php5.c>
php_flag allow_url_include Off
php_flag expose_php Off
</IfModule>
### Security and Spam ###
# Protect the repository directory
<IfModule mod_rewrite.c>
RewriteRule "(^|/).git" - [F,L]
</IfModule>
ServerSignature Off
# Deny access to all CGI, Perl, Python, Bash, SQL, Template, INI configuration, cache, log, temporary and text files
<FilesMatch ".(cgi|pl|py|sh|bash|sql|tpl|ini|cache|log|tmp|txt)$">
<IfModule mod_authz_core.c>
Require all denied
</IfModule>
</FilesMatch>
# Leave open the humans.txt and robots.txt file
<FilesMatch "humans.txt|robots.txt">
<IfModule mod_authz_core.c>
Require all granted
</IfModule>
</FilesMatch>
# Deny access for "composer.json"
<FilesMatch "composer.json|sample.htaccess">
<IfModule mod_authz_core.c>
Require all denied
</IfModule>
</FilesMatch>
# Prevent .htaccess/.htpasswd from being downloaded
<Files ~ "^.ht">
<IfModule mod_authz_core.c>
Require all denied
</IfModule>
</Files>
<Limit GET POST PUT DELETE HEAD>
<IfModule mod_authz_core.c>
<RequireAll>
Require all granted
Require not env bad_bot
</RequireAll>
</IfModule>
</Limit>
ErrorDocument 400 /error/http/index?code=400
ErrorDocument 401 /error/http/index?code=401
ErrorDocument 402 /error/http/index?code=402
ErrorDocument 403 /error/http/index?code=403
ErrorDocument 404 /error
ErrorDocument 405 /error/http/index?code=405
ErrorDocument 500 /error/http/index?code=500
ErrorDocument 501 /error/http/index?code=501
ErrorDocument 502 /error/http/index?code=502
ErrorDocument 504 /error/http/index?code=504
ErrorDocument 505 /error/http/index?code=505
# URL Rewrite
<IfModule mod_rewrite.c>
<IfModule mod_env.c>
# Tell PHP that the mod_rewrite module is ENABLED.
SetEnv HTTP_MOD_REWRITE On
</IfModule>
# Uncomment the following only if HTTPS is enabled. HSTS header increases security of your website & SEO
# <IfModule mod_headers.c>
# Header set Strict-Transport-Security "max-age=31536000; preload" env=HTTPS
# </IfModule>
# Remove www subdomain in the URL
# RewriteCond %{HTTP_HOST} ^www.(.+)$ [NC]
# RewriteRule ^(.*)$ http://%1/$1 [R=301,L]
# Force the URL to be https (only if you have an SSL certificate). May not be necessary if HSTS is enabled
# RewriteCond %{SERVER_PORT} 80
# RewriteRule ^(.*)$ https://%{HTTP_HOST}%{REQUEST_URI} [R=301,L]
RewriteCond %{REQUEST_FILENAME} !-f
RewriteCond %{REQUEST_FILENAME} !-d
RewriteRule ^(.*)$ index.php?$1 [L,QSA]
# Start Bad Bot Prvention
RewriteCond %{HTTP_USER_AGENT} ^BackWeb [OR]
RewriteCond %{HTTP_USER_AGENT} ^Bandit [OR]
RewriteCond %{HTTP_USER_AGENT} ^BatchFTP [OR]
RewriteCond %{HTTP_USER_AGENT} ^Buddy [OR]
RewriteCond %{HTTP_USER_AGENT} ^Collector [OR]
RewriteCond %{HTTP_USER_AGENT} ^Copier [OR]
RewriteCond %{HTTP_USER_AGENT} ^DownloadWonder [OR]
RewriteCond %{HTTP_USER_AGENT} ^Downloader [OR]
RewriteCond %{HTTP_USER_AGENT} ^LinkWalker [OR]
RewriteCond %{HTTP_USER_AGENT} ^BlackWidow [OR]
RewriteCond %{HTTP_USER_AGENT} ^Bot mailto:[email protected] [OR]
RewriteCond %{HTTP_USER_AGENT} ^ChinaClaw [OR]
RewriteCond %{HTTP_USER_AGENT} ^Custo [OR]
RewriteCond %{HTTP_USER_AGENT} ^DISCo [OR]
RewriteCond %{HTTP_USER_AGENT} ^Download Demon [OR]
RewriteCond %{HTTP_USER_AGENT} ^eCatch [OR]
RewriteCond %{HTTP_USER_AGENT} ^EirGrabber [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailSiphon [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailWolf [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailCollector [OR]
RewriteCond %{HTTP_USER_AGENT} ^Crescent [OR]
RewriteCond %{HTTP_USER_AGENT} ^CherryPicker [OR]
RewriteCond %{HTTP_USER_AGENT} ^Express WebPictures [OR]
RewriteCond %{HTTP_USER_AGENT} ^ExtractorPro [OR]
RewriteCond %{HTTP_USER_AGENT} ^EyeNetIE [OR]
RewriteCond %{HTTP_USER_AGENT} ^FlashGet [OR]
RewriteCond %{HTTP_USER_AGENT} ^GetRight [OR]
RewriteCond %{HTTP_USER_AGENT} ^GetWeb! [OR]
RewriteCond %{HTTP_USER_AGENT} ^Go!Zilla [OR]
RewriteCond %{HTTP_USER_AGENT} ^gotit [OR]
RewriteCond %{HTTP_USER_AGENT} ^Go-Ahead-Got-It [OR]
RewriteCond %{HTTP_USER_AGENT} ^GrabNet [OR]
RewriteCond %{HTTP_USER_AGENT} ^Grafula [OR]
RewriteCond %{HTTP_USER_AGENT} ^HMView [OR]
RewriteCond %{HTTP_USER_AGENT} HTTrack [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Image Stripper [OR]
RewriteCond %{HTTP_USER_AGENT} ^Image Sucker [OR]
RewriteCond %{HTTP_USER_AGENT} Indy Library [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^InterGET [OR]
RewriteCond %{HTTP_USER_AGENT} ^Internet Ninja [OR]
RewriteCond %{HTTP_USER_AGENT} ^JetCar [OR]
RewriteCond %{HTTP_USER_AGENT} ^JOC Web Spider [OR]
RewriteCond %{HTTP_USER_AGENT} ^larbin [OR]
RewriteCond %{HTTP_USER_AGENT} ^libghttp [OR]
RewriteCond %{HTTP_USER_AGENT} ^LeechFTP [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mass Downloader [OR]
RewriteCond %{HTTP_USER_AGENT} ^MIDown tool [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mister PiX [OR]
RewriteCond %{HTTP_USER_AGENT} ^Navroad [OR]
RewriteCond %{HTTP_USER_AGENT} ^NearSite [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetAnts [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetSpider [OR]
RewriteCond %{HTTP_USER_AGENT} ^Net Vampire [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetZIP [OR]
RewriteCond %{HTTP_USER_AGENT} ^Octopus [OR]
RewriteCond %{HTTP_USER_AGENT} ^Offline Explorer [OR]
RewriteCond %{HTTP_USER_AGENT} ^Offline Navigator [OR]
RewriteCond %{HTTP_USER_AGENT} ^PageGrabber [OR]
RewriteCond %{HTTP_USER_AGENT} ^Papa Foto [OR]
RewriteCond %{HTTP_USER_AGENT} ^pavuk [OR]
RewriteCond %{HTTP_USER_AGENT} ^pcBrowser [OR]
RewriteCond %{HTTP_USER_AGENT} libwww-perl.* [OR]
RewriteCond %{HTTP_USER_AGENT} ^Pockey [OR]
RewriteCond %{HTTP_USER_AGENT} ^Pump [OR]
RewriteCond %{HTTP_USER_AGENT} ^RealDownload [OR]
RewriteCond %{HTTP_USER_AGENT} ^ReGet [OR]
RewriteCond %{HTTP_USER_AGENT} ^SiteSnagger [OR]
RewriteCond %{HTTP_USER_AGENT} ^SmartDownload [OR]
RewriteCond %{HTTP_USER_AGENT} ^SuperBot [OR]
RewriteCond %{HTTP_USER_AGENT} ^SuperHTTP [OR]
RewriteCond %{HTTP_USER_AGENT} ^Surfbot [OR]
RewriteCond %{HTTP_USER_AGENT} ^tAkeOut [OR]
RewriteCond %{HTTP_USER_AGENT} ^Teleport Pro [OR]
RewriteCond %{HTTP_USER_AGENT} ^VoidEYE [OR]
RewriteCond %{HTTP_USER_AGENT} ^Web Image Collector [OR]
RewriteCond %{HTTP_USER_AGENT} ^Web Sucker [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebAuto [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebCopier [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebFetch [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebGo IS [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebLeacher [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebReaper [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebSauger [OR]
RewriteCond %{HTTP_USER_AGENT} ^Website eXtractor [OR]
RewriteCond %{HTTP_USER_AGENT} ^Website Quester [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebStripper [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebWhacker [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebZIP [OR]
RewriteCond %{HTTP_USER_AGENT} ^Wget [OR]
RewriteCond %{HTTP_USER_AGENT} ^Widow [OR]
RewriteCond %{HTTP_USER_AGENT} ^WWWOFFLE [OR]
RewriteCond %{HTTP_USER_AGENT} ^Xaldon WebSpider [OR]
RewriteCond %{HTTP_USER_AGENT} ^Zeus
RewriteRule ^.* - [F,L]
# End Bad Bot Prevention
</IfModule>
# Modify Headers
<IfModule mod_headers.c>
# Cache files
<FilesMatch ".(jpe?g|png|gif|ico|webp|swf|mp3|mp4|flv|webm|pdf)$">
Header set Cache-Control "public"
Header set Expires "Mon, 20 Apr 2060 20:00:00 GMT"
Header unset Last-Modified
</FilesMatch>
# Cache JavaScript & CSS
<FilesMatch ".(js|css)$">
Header set Cache-Control "public"
Header set Expires "Mon, 20 Apr 2060 20:00:00 GMT"
Header unset Last-Modified
</FilesMatch>
</IfModule>
# Compress files
<IfModule mod_deflate.c>
# Insert filter
SetOutputFilter DEFLATE
<IfModule mod_setenvif.c>
# Netscape 4.x has some problems...
BrowserMatch ^Mozilla/4 gzip-only-text/html
# Netscape 4.06-4.08 have some more problems
BrowserMatch ^Mozilla/4.0[678] no-gzip
# MSIE masquerades as Netscape, but it is fine
BrowserMatch bMSIE !no-gzip !gzip-only-text/html
# Don't compress images/archives/music/video/etc
SetEnvIfNoCase Request_URI .(?:gif|jpe?g|png)$ no-gzip dont-vary
SetEnvIfNoCase Request_URI .(?:exe|t?gz|zip|bz2|sit|rar)$ no-gzip dont-vary
SetEnvIfNoCase Request_URI .(?:avi|mov|mp3|mp4|rm|flv|swf|mp?g)$ no-gzip dont-vary
</IfModule>
<IfModule mod_headers.c>
# Make sure proxies don't deliver the wrong content
Header append Vary User-Agent env=!dont-vary
</IfModule>
</IfModule>
# Enable Expirations
<IfModule mod_expires.c>
ExpiresActive On
ExpiresDefault "access plus 1 month"
# expire images/css/js/swf files after a month in the client's cache
ExpiresByType text/css "access plus 31 days"
ExpiresByType text/javascript "access plus 31 days"
ExpiresByType application/javascript "access plus 31 days"
ExpiresByType application/x-javascript "access plus 31 days"
ExpiresByType application/x-gzip "access plus 31 days"
ExpiresByType image/gif "access plus 31 days"
ExpiresByType image/jpeg "access plus 31 days"
ExpiresByType image/png "access plus 31 days"
ExpiresByType application/x-shockwave-flash "access plus 31 days"
ExpiresByType image/vnd.microsoft.icon "access plus 31 days"
ExpiresByType image/x-icon "access plus 1 year"
</IfModule>
# For the videos extensions
#AddType video/ogg .ogg
AddType video/webm .webm
AddType video/mp4 .mp4
AddType application/rss+xml .xml
2
Answers
This is the correct answer it was written by donatJ
To redirect
/index.php/foo/bar/baz/something
to/something
then you can do something like the following at the very top of your.htaccess
file:The shorthand character class
w
matches word characters (a-z
,A-Z
,0-9
and_
) so matches the example URL given.This redirect also canonicalises the scheme, so place this directive before the HTTP to HTTPS redirect to avoid a 2nd redirect when requesting HTTP.
However, a URL of this nature does suggest a missconfiguration in your site/application. Although, it could also be a malicious request (although the non-spam-like content in the URL does not really suggest that).
You should check the
Referer
in your server’s access log for these requests, which should give you a clue as to their origin.