Add filetype extraction by regex

Adds the ability to set filetype on open by extracting it from the
file using a regex on the first few lines (default 2).
This commit is contained in:
Lex Trotman 2011-10-22 22:55:57 +11:00
parent b1b88286cf
commit cdb748c2c4
5 changed files with 158 additions and 44 deletions

View File

@ -3,10 +3,10 @@
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="Docutils 0.8.1: http://docutils.sourceforge.net/" />
<meta name="generator" content="Docutils 0.6: http://docutils.sourceforge.net/" />
<title>Geany</title>
<meta name="authors" content="Enrico Tröger Nick Treleaven Frank Lanitz Colomban Wendling" />
<meta name="date" content="2011-10-01" />
<meta name="date" content="2011-10-02" />
<style type="text/css">
/*
@ -140,9 +140,9 @@ Stylesheet for Geany's documentation based on a version of John Gabriele.
<br />Frank Lanitz
<br />Colomban Wendling</td></tr>
<tr><th class="docinfo-name">Date:</th>
<td>2011-10-01</td></tr>
<td>2011-10-02</td></tr>
<tr><th class="docinfo-name">Version:</th>
<td>0.21</td></tr>
<td>1.22</td></tr>
</tbody>
</table>
<p>Copyright © 2005-2011</p>
@ -507,13 +507,14 @@ of this program, and also in the chapter <a class="reference internal" href="#gn
<li><a class="reference internal" href="#compile-time-options" id="id231">Compile-time options</a><ul>
<li><a class="reference internal" href="#src-geany-h" id="id232">src/geany.h</a></li>
<li><a class="reference internal" href="#project-h" id="id233">project.h</a></li>
<li><a class="reference internal" href="#editor-h" id="id234">editor.h</a></li>
<li><a class="reference internal" href="#keyfile-c" id="id235">keyfile.c</a></li>
<li><a class="reference internal" href="#build-c" id="id236">build.c</a></li>
<li><a class="reference internal" href="#filetypes-c" id="id234">filetypes.c</a></li>
<li><a class="reference internal" href="#editor-h" id="id235">editor.h</a></li>
<li><a class="reference internal" href="#keyfile-c" id="id236">keyfile.c</a></li>
<li><a class="reference internal" href="#build-c" id="id237">build.c</a></li>
</ul>
</li>
<li><a class="reference internal" href="#gnu-general-public-license" id="id237">GNU General Public License</a></li>
<li><a class="reference internal" href="#license-for-scintilla-and-scite" id="id238">License for Scintilla and SciTE</a></li>
<li><a class="reference internal" href="#gnu-general-public-license" id="id238">GNU General Public License</a></li>
<li><a class="reference internal" href="#license-for-scintilla-and-scite" id="id239">License for Scintilla and SciTE</a></li>
</ul>
</div>
<div class="section" id="introduction">
@ -2866,10 +2867,10 @@ internal default, which is currently:</p>
<p>Note that <tt class="docutils literal">\t</tt> = tab.</p>
<table border="1" class="docutils">
<colgroup>
<col width="34%" />
<col width="46%" />
<col width="9%" />
<col width="12%" />
<col width="33%" />
<col width="45%" />
<col width="10%" />
<col width="11%" />
</colgroup>
<thead valign="bottom">
<tr><th class="head">Key</th>
@ -2943,6 +2944,17 @@ saving. Backup is named <cite>filename~</cite>.</td>
<td>false</td>
<td>immediately</td>
</tr>
<tr><td><strong>Filetype related</strong></td>
<td>&nbsp;</td>
<td>&nbsp;</td>
<td>&nbsp;</td>
</tr>
<tr><td>extract_filetype_regex</td>
<td>Regex to extract filetype name from file
via capture group one.</td>
<td>See below.</td>
<td>immediately</td>
</tr>
<tr><td><strong>Search related</strong></td>
<td>&nbsp;</td>
<td>&nbsp;</td>
@ -2978,6 +2990,7 @@ execute section of the Build menu.</td>
</tr>
</tbody>
</table>
<p>The extract_filetype_regex has the default value GEANY_DEFAULT_FILETYPE_REGEX.</p>
</div>
<div class="section" id="terminal-vte-preferences">
<h3><a class="toc-backref" href="#id126">Terminal (VTE) preferences</a></h3>
@ -5558,7 +5571,7 @@ bsd, gpl, snippets.</td>
</tr>
<tr><td>geanyversion</td>
<td>The actual Geany version, e.g.
&quot;Geany 0.21&quot;.</td>
&quot;Geany 1.22&quot;.</td>
<td>file templates, file header,
function description, ChangeLog entry,
bsd, gpl, snippets.</td>
@ -5890,7 +5903,7 @@ editing the file, to build the HTML document to see how your changes
look, run &quot;<tt class="docutils literal">make doc</tt>&quot; in the subdirectory <tt class="docutils literal">doc</tt> of Geany's source
directory. This regenerates the <tt class="docutils literal">geany.html</tt> file. To generate a PDF
file, use the command &quot;<tt class="docutils literal">make pdf</tt>&quot; which should generate a file called
geany-0.21.pdf.</p>
geany-1.22.pdf.</p>
<p>After you are happy with your changes, create a patch:</p>
<pre class="literal-block">
% svn diff geany.txt &gt; foo.patch
@ -6131,8 +6144,31 @@ open dialog.</td>
</tbody>
</table>
</div>
<div class="section" id="filetypes-c">
<h2><a class="toc-backref" href="#id234">filetypes.c</a></h2>
<table border="1" class="docutils">
<colgroup>
<col width="33%" />
<col width="48%" />
<col width="20%" />
</colgroup>
<thead valign="bottom">
<tr><th class="head">Option</th>
<th class="head">Description</th>
<th class="head">Default</th>
</tr>
</thead>
<tbody valign="top">
<tr><td>GEANY_FILETYPE_SEARCH_LINES</td>
<td>The number of lines to search for the
filetype with the extract filetype regex.</td>
<td>2</td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="editor-h">
<h2><a class="toc-backref" href="#id234">editor.h</a></h2>
<h2><a class="toc-backref" href="#id235">editor.h</a></h2>
<table border="1" class="docutils">
<colgroup>
<col width="33%" />
@ -6158,7 +6194,7 @@ underscore.</td>
</table>
</div>
<div class="section" id="keyfile-c">
<h2><a class="toc-backref" href="#id235">keyfile.c</a></h2>
<h2><a class="toc-backref" href="#id236">keyfile.c</a></h2>
<p>These are default settings that can be overridden in the <a class="reference internal" href="#preferences">Preferences</a> dialog.</p>
<table border="1" class="docutils">
<colgroup>
@ -6235,11 +6271,17 @@ comment.</td>
Geany provide.</td>
<td>30</td>
</tr>
<tr><td>GEANY_DEFAULT_FILETYPE_REGEX</td>
<td>The default regex to extract filetypes from
files.</td>
<td>See below.</td>
</tr>
</tbody>
</table>
<p>The GEANY_DEFAULT_FILETYPE_REGEX default value is -\*-\s*([^\s]+)\s*-\*- which finds Emacs filetypes.</p>
</div>
<div class="section" id="build-c">
<h2><a class="toc-backref" href="#id236">build.c</a></h2>
<h2><a class="toc-backref" href="#id237">build.c</a></h2>
<table border="1" class="docutils">
<colgroup>
<col width="33%" />
@ -6284,7 +6326,7 @@ overriding the compile setting.</td>
</div>
</div>
<div class="section" id="gnu-general-public-license">
<h1><a class="toc-backref" href="#id237">GNU General Public License</a></h1>
<h1><a class="toc-backref" href="#id238">GNU General Public License</a></h1>
<pre class="literal-block">
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
@ -6629,7 +6671,7 @@ Public License instead of this License.
</pre>
</div>
<div class="section" id="license-for-scintilla-and-scite">
<h1><a class="toc-backref" href="#id238">License for Scintilla and SciTE</a></h1>
<h1><a class="toc-backref" href="#id239">License for Scintilla and SciTE</a></h1>
<p>Copyright 1998-2003 by Neil Hodgson &lt;neilh(at)scintilla(dot)org&gt;</p>
<p>All Rights Reserved</p>
<p>Permission to use, copy, modify, and distribute this software and
@ -6649,7 +6691,7 @@ USE OR PERFORMANCE OF THIS SOFTWARE.</p>
<div class="footer">
<hr class="footer" />
<a class="reference external" href="geany.txt">View document source</a>.
Generated on: 2011-10-02 13:04 UTC.
Generated on: 2011-10-22 11:53 UTC.
Generated by <a class="reference external" href="http://docutils.sourceforge.net/">Docutils</a> from <a class="reference external" href="http://docutils.sourceforge.net/rst.html">reStructuredText</a> source.
</div>

View File

@ -2485,14 +2485,14 @@ internal default, which is currently:
Note that ``\t`` = tab.
================================ =========================================== ======== ===========
Key Description Default Applies
================================ =========================================== ======== ===========
================================ =========================================== ========== ===========
Key Description Default Applies
================================ =========================================== ========== ===========
**VTE related**
emulation Terminal emulation mode. Only change this xterm immediately
emulation Terminal emulation mode. Only change this xterm immediately
if you have VTE termcap files other than
``vte/termcap/xterm``.
send_selection_unsafe By default, Geany strips any trailing false immediately
send_selection_unsafe By default, Geany strips any trailing false immediately
newline characters from the current
selection before sending it to the terminal
to not execute arbitrary code. This is
@ -2501,7 +2501,7 @@ send_selection_unsafe By default, Geany strips any trailing f
it to be executed directly, set this option
to true.
**File related**
use_atomic_file_saving Defines the mode how Geany saves files to false immediately
use_atomic_file_saving Defines the mode how Geany saves files to false immediately
disk. If disabled, Geany directly writes
the content of the document to disk. This
might cause loss of data when there is
@ -2518,23 +2518,27 @@ use_atomic_file_saving Defines the mode how Geany saves files to f
break things seriously.
The better approach would be to ensure your
disk won't run out of free space.
use_gio_unsafe_file_saving Whether to use GIO as the unsafe file true immediately
use_gio_unsafe_file_saving Whether to use GIO as the unsafe file true immediately
saving backend. It is better on most
situations but is known not to work
correctly on some complex setups.
gio_unsafe_save_backup Make a backup when using GIO unsafe file false immediately
gio_unsafe_save_backup Make a backup when using GIO unsafe file false immediately
saving. Backup is named `filename~`.
**Filetype related**
extract_filetype_regex Regex to extract filetype name from file See below. immediately
via capture group one.
**Search related**
find_selection_type See `Find selection`_. 0 immediately
find_selection_type See `Find selection`_. 0 immediately
**Build Menu related**
number_ft_menu_items The maximum number of menu items in the 2 on restart
number_ft_menu_items The maximum number of menu items in the 2 on restart
filetype section of the Build menu.
number_non_ft_menu_items The maximum number of menu items in the 3 on restart
number_non_ft_menu_items The maximum number of menu items in the 3 on restart
independent section of the Build menu.
number_exec_menu_items The maximum number of menu items in the 2 on restart
number_exec_menu_items The maximum number of menu items in the 2 on restart
execute section of the Build menu.
================================ =========================================== ======== ===========
================================ =========================================== ========== ===========
The extract_filetype_regex has the default value GEANY_DEFAULT_FILETYPE_REGEX.
Terminal (VTE) preferences
^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -5100,6 +5104,16 @@ GEANY_PROJECT_EXT The default filename extension for Geany ge
open dialog.
============================== ============================================ ==================
filetypes.c
-----------
============================== ============================================ ==================
Option Description Default
============================== ============================================ ==================
GEANY_FILETYPE_SEARCH_LINES The number of lines to search for the 2
filetype with the extract filetype regex.
============================== ============================================ ==================
editor.h
--------
@ -5142,8 +5156,12 @@ GEANY_TOGGLE_MARK A string which is used to mark a toggled "~
comment.
GEANY_MAX_AUTOCOMPLETE_WORDS How many autocompletion suggestions should 30
Geany provide.
GEANY_DEFAULT_FILETYPE_REGEX The default regex to extract filetypes from See below.
files.
============================== ============================================ ==================
The GEANY_DEFAULT_FILETYPE_REGEX default value is -\\*-\\s*([^\\s]+)\\s*-\\*- which finds Emacs filetypes.
build.c
-------

View File

@ -61,6 +61,7 @@ typedef struct GeanyFilePrefs
gboolean ensure_convert_new_lines;
gboolean gio_unsafe_save_backup;
gboolean use_gio_unsafe_file_saving; /* whether to use GIO as the unsafe backend */
gchar *extract_filetype_regex; /* regex to extract filetype on opening */
}
GeanyFilePrefs;

View File

@ -46,6 +46,7 @@
#include <stdlib.h>
#define GEANY_FILETYPE_SEARCH_LINES 2 /* lines of file to search for filetype */
GPtrArray *filetypes_array = NULL; /* Dynamic array of filetype pointers */
@ -955,13 +956,48 @@ static GeanyFiletype *find_shebang(const gchar *utf8_filename, const gchar *line
/* Detect the filetype checking for a shebang, then filename extension. */
static GeanyFiletype *filetypes_detect_from_file_internal(const gchar *utf8_filename,
const gchar *line)
gchar **lines)
{
GeanyFiletype *ft;
GeanyFiletype *ft;
gint i;
GRegex *ft_regex;
GMatchInfo *match;
GError *regerr = NULL;
/* try to find a shebang and if found use it prior to the filename extension
* also checks for <?xml */
ft = find_shebang(utf8_filename, line);
ft = find_shebang(utf8_filename, lines[0]);
if (ft != NULL)
return ft;
/* try to extract the filetype using a regex capture */
ft_regex = g_regex_new( file_prefs.extract_filetype_regex,
G_REGEX_RAW | G_REGEX_MULTILINE,
0,
&regerr);
if (regerr == NULL)
{
for (i = 0; i < GEANY_FILETYPE_SEARCH_LINES; i++)
{
if (g_regex_match(ft_regex, lines[i], 0, &match))
{
gchar *capture = g_match_info_fetch(match, 1);
if (capture != NULL)
{
ft = filetypes_lookup_by_name(capture);
g_free(capture);
}
}
g_match_info_free(match);
if (ft != NULL)
break;
}
}
else
{
g_error_free(regerr);
}
g_regex_unref(ft_regex);
if (ft != NULL)
return ft;
@ -975,15 +1011,22 @@ static GeanyFiletype *filetypes_detect_from_file_internal(const gchar *utf8_file
/* Detect the filetype for the document, checking for a shebang, then filename extension. */
GeanyFiletype *filetypes_detect_from_document(GeanyDocument *doc)
{
GeanyFiletype *ft;
gchar *line;
GeanyFiletype *ft;
gchar *lines[GEANY_FILETYPE_SEARCH_LINES];
gint i;
if (doc == NULL)
return filetypes[GEANY_FILETYPES_NONE];
line = sci_get_line(doc->editor->sci, 0);
ft = filetypes_detect_from_file_internal(doc->file_name, line);
g_free(line);
for (i = 0; i < GEANY_FILETYPE_SEARCH_LINES; ++i)
{
lines[i] = sci_get_line(doc->editor->sci, i);
}
ft = filetypes_detect_from_file_internal(doc->file_name, lines);
for (i = 0; i < GEANY_FILETYPE_SEARCH_LINES; ++i)
{
g_free(lines[i]);
}
return ft;
}
@ -1001,8 +1044,10 @@ GeanyFiletype *filetypes_detect_from_document(GeanyDocument *doc)
GeanyFiletype *filetypes_detect_from_file(const gchar *utf8_filename)
{
gchar line[1024];
FILE *f;
gchar *lines[GEANY_FILETYPE_SEARCH_LINES];
FILE *f;
gchar *locale_name = utils_get_locale_from_utf8(utf8_filename);
gint i;
f = g_fopen(locale_name, "r");
g_free(locale_name);
@ -1011,7 +1056,12 @@ GeanyFiletype *filetypes_detect_from_file(const gchar *utf8_filename)
if (fgets(line, sizeof(line), f) != NULL)
{
fclose(f);
return filetypes_detect_from_file_internal(utf8_filename, line);
for (i = 0; i < GEANY_FILETYPE_SEARCH_LINES; ++i)
{
lines[i] = NULL;
}
lines[0] = line;
return filetypes_detect_from_file_internal(utf8_filename, lines);
}
fclose(f);
}

View File

@ -82,6 +82,7 @@
#define GEANY_TOGGLE_MARK "~ "
#define GEANY_MAX_AUTOCOMPLETE_WORDS 30
#define GEANY_MAX_SYMBOLS_UPDATE_FREQ 250
#define GEANY_DEFAULT_FILETYPE_REGEX "-\\*-\\s*([^\\s]+)\\s*-\\*-"
static gchar *scribble_text = NULL;
@ -218,6 +219,8 @@ static void init_pref_groups(void)
"indent_hard_tab_width", 8);
stash_group_add_integer(group, (gint*)&search_prefs.find_selection_type,
"find_selection_type", GEANY_FIND_SEL_CURRENT_WORD);
stash_group_add_string(group, &file_prefs.extract_filetype_regex,
"extract_filetype_regex", GEANY_DEFAULT_FILETYPE_REGEX);
/* Note: Interface-related various prefs are in ui_init_prefs() */