summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVishesh Handa <me@vhanda.in>2012-07-25 14:52:20 (GMT)
committerVishesh Handa <me@vhanda.in>2012-07-25 19:10:10 (GMT)
commit414fd4c1c3c358aab70e1e10dd726ea2c1432e1f (patch)
treecfea217114f72d63ce9e297d842332b358f4e0c2
parent75723eb183cbe2681e80453f4891e9ee207bcc00 (diff)
Introduce fall-back indexing
If the strigi indexer fails to index the file, we index it with this new "simple indexer" which just sets the basic properties like url, filename and the mimetype. This way, file which weren't indexed, will at least still appear in some of the search results. The nie:lastModifed for these files has not been set, so the indexscheduler will still find these files, and try to index them again. REVIEW: 105730
-rw-r--r--services/fileindexer/indexer/CMakeLists.txt1
-rw-r--r--services/fileindexer/indexer/main.cpp15
-rw-r--r--services/fileindexer/indexer/simpleindexer.cpp138
-rw-r--r--services/fileindexer/indexer/simpleindexer.h43
4 files changed, 192 insertions, 5 deletions
diff --git a/services/fileindexer/indexer/CMakeLists.txt b/services/fileindexer/indexer/CMakeLists.txt
index 1a5a506..38b46c8 100644
--- a/services/fileindexer/indexer/CMakeLists.txt
+++ b/services/fileindexer/indexer/CMakeLists.txt
@@ -28,6 +28,7 @@ include_directories(
set(indexer_SRCS
main.cpp
indexer.cpp
+ simpleindexer.cpp
nepomukindexwriter.cpp
../util.cpp
../../../servicestub/priority.cpp
diff --git a/services/fileindexer/indexer/main.cpp b/services/fileindexer/indexer/main.cpp
index e3c7288..a46a651 100644
--- a/services/fileindexer/indexer/main.cpp
+++ b/services/fileindexer/indexer/main.cpp
@@ -21,6 +21,7 @@
*/
#include "indexer.h"
+#include "simpleindexer.h"
#include "../util.h"
#include "../../../servicestub/priority.h"
#include "nepomukversion.h"
@@ -50,22 +51,22 @@ int main(int argc, char *argv[])
ki18n("(C) 2011, Vishesh Handa, Sebastian Trueg"));
aboutData.addAuthor(ki18n("Vishesh Handa"), ki18n("Current maintainer"), "handa.vish@gmail.com");
aboutData.addCredit(ki18n("Sebastian TrĂ¼g"), ki18n("Developer"), "trueg@kde.org");
-
+
KCmdLineArgs::init(argc, argv, &aboutData);
-
+
KCmdLineOptions options;
options.add("uri <uri>", ki18n("The URI provided will be forced on the resource"));
options.add("mtime <time>", ki18n("The modification time of the resource in time_t format"));
options.add("+[url]", ki18n("The URL of the file to be indexed"));
options.add("clear", ki18n("Remove all indexed data of the URL provided"));
-
- KCmdLineArgs::addCmdLineOptions(options);
+
+ KCmdLineArgs::addCmdLineOptions(options);
const KCmdLineArgs *args = KCmdLineArgs::parsedArgs();
// Application
QCoreApplication app( argc, argv );
KComponentData data( aboutData, KComponentData::RegisterAsMainComponent );
-
+
const KUrl uri = args->getOption("uri");
const uint mtime = args->getOption("mtime").toUInt();
@@ -90,6 +91,10 @@ int main(int argc, char *argv[])
if( !indexer.indexFile( args->url(0), uri, mtime ) ) {
QTextStream s(stdout);
s << indexer.lastError();
+
+ Nepomuk2::SimpleIndexer simpleIndexer( args->url(0) );
+ simpleIndexer.save();
+
return 1;
}
else {
diff --git a/services/fileindexer/indexer/simpleindexer.cpp b/services/fileindexer/indexer/simpleindexer.cpp
new file mode 100644
index 0000000..abc8aae
--- /dev/null
+++ b/services/fileindexer/indexer/simpleindexer.cpp
@@ -0,0 +1,138 @@
+/*
+ This file is part of the Nepomuk KDE project.
+ Copyright (C) 2012 Vishesh Handa <me@vhanda.in>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) version 3, or any
+ later version accepted by the membership of KDE e.V. (or its
+ successor approved by the membership of KDE e.V.), which shall
+ act as a proxy defined in Section 6 of version 3 of the license.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "simpleindexer.h"
+#include "datamanagement.h"
+#include "storeresourcesjob.h"
+
+#include "nfo.h"
+#include "nie.h"
+#include "kext.h"
+
+#include <Soprano/Vocabulary/NRL>
+#include <Soprano/Vocabulary/RDF>
+
+#include <QtCore/QFileInfo>
+#include <QtCore/QDateTime>
+
+#include <KMimeType>
+#include <KDebug>
+#include <KJob>
+#include <kde_file.h>
+
+using namespace Nepomuk2::Vocabulary;
+using namespace Soprano::Vocabulary;
+
+Nepomuk2::SimpleIndexer::SimpleIndexer(const QUrl& fileUrl)
+{
+ SimpleResource res;
+
+ res.addProperty(NIE::url(), fileUrl);
+ res.addProperty(NFO::fileName(), KUrl(fileUrl).fileName());
+
+ res.addType(NFO::FileDataObject());
+ res.addType(NIE::InformationElement());
+
+ QFileInfo fileInfo(fileUrl.toLocalFile());
+ if( fileInfo.isDir() )
+ res.addType(NFO::Folder());
+
+ //
+ // Types by mime type
+ //
+ QString mimeType = KMimeType::findByUrl( fileUrl )->name();
+ QList<QUrl> types = typesForMimeType( mimeType );
+ foreach(const QUrl& type, types)
+ res.addType( type );
+
+ res.addProperty(NIE::mimeType(), mimeType);
+
+ // Do not set NIE::lastModified
+ // We only set that for files which are properly indexed
+ res.setProperty(NIE::created(), fileInfo.created());
+
+ m_graph << res;
+}
+
+// static
+QList<QUrl> Nepomuk2::SimpleIndexer::typesForMimeType(const QString& mimeType)
+{
+ QList<QUrl> types;
+
+ // Basic types
+ if( mimeType.contains(QLatin1String("audio")) )
+ types << NFO::Audio();
+ if( mimeType.contains(QLatin1String("video")) )
+ types << NFO::Video();
+ if( mimeType.contains(QLatin1String("image")) )
+ types << NFO::Image();
+ if( mimeType.contains(QLatin1String("text")) )
+ types << NFO::PlainTextDocument();
+
+ // Documents
+ if( mimeType.contains(QLatin1String("application/msword")) )
+ types << NFO::Document();
+ if( mimeType.contains(QLatin1String("application/vnd.oasis.opendocument.text")) )
+ types << NFO::Document();
+ if( mimeType.contains(QLatin1String("application/epub")) )
+ types << NFO::Document();
+ if( mimeType.contains(QLatin1String("application/pdf")) )
+ types << NFO::Document();
+
+ // Presentation
+ if( mimeType.contains(QLatin1String("application/vnd.oasis.opendocument.presentation")) )
+ types << NFO::Presentation();
+ if( mimeType.contains(QLatin1String("powerpoint") ) )
+ types << NFO::Presentation();
+
+ // Spreadsheet
+ if( mimeType.contains(QLatin1String("excel")) )
+ types << NFO::Spreadsheet();
+ if( mimeType.contains(QLatin1String("application/vnd.oasis.opendocument.spreadsheet") ) )
+ types << NFO::Spreadsheet();
+
+ // Html
+ if( mimeType.contains(QLatin1String("text/html") ) )
+ types << NFO::HtmlDocument();
+
+ // TODO: Add some basic NMM types?
+
+ return types;
+}
+
+bool Nepomuk2::SimpleIndexer::save()
+{
+ QHash<QUrl, QVariant> additionalMetadata;
+ additionalMetadata.insert( RDF::type(), NRL::DiscardableInstanceBase() );
+
+ // we do not have an event loop - thus, we need to delete the job ourselves
+ QScopedPointer<KJob> job( Nepomuk2::storeResources( m_graph, IdentifyNone,
+ NoStoreResourcesFlags, additionalMetadata ) );
+ job->setAutoDelete(false);
+ job->exec();
+ if( job->error() ) {
+ kError() << "SimpleIndexerError: " << job->errorString();
+ return false;
+ }
+
+ return true;
+}
+
diff --git a/services/fileindexer/indexer/simpleindexer.h b/services/fileindexer/indexer/simpleindexer.h
new file mode 100644
index 0000000..df062ac
--- /dev/null
+++ b/services/fileindexer/indexer/simpleindexer.h
@@ -0,0 +1,43 @@
+/*
+ This file is part of the Nepomuk KDE project.
+ Copyright (C) 2012 Vishesh Handa <me@vhanda.in>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) version 3, or any
+ later version accepted by the membership of KDE e.V. (or its
+ successor approved by the membership of KDE e.V.), which shall
+ act as a proxy defined in Section 6 of version 3 of the license.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef SIMPLEINDEXER_H
+#define SIMPLEINDEXER_H
+
+#include <QtCore/QUrl>
+
+#include "simpleresource.h"
+#include "simpleresourcegraph.h"
+
+namespace Nepomuk2 {
+ class SimpleIndexer
+ {
+ public:
+ SimpleIndexer(const QUrl& fileUrl);
+ bool save();
+
+ static QList<QUrl> typesForMimeType(const QString& mimeType);
+ private:
+ SimpleResourceGraph m_graph;
+ };
+}
+
+#endif // SIMPLEINDEXER_H