On this page
In order to facilitate the evaluation and verification of the search engine's capabilities, KandaSearch distributes several configurations and corresponding data, including the livedoor news corpus, through its extension library.
Please follow the steps below to use the service:
Now, let's explain the characteristics of each configuration and data.
Please note that due to version updates of Apache Solr, the content of the configurations and data we introduce may vary.
EC is a configuration and data that represent the schema structure of typical e-commerce product information handled on e-commerce websites.
The managed-schema.xml
defines fields like the following:
<uniqueKey>id</uniqueKey>
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="productCode" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="productName" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="productName_2g" type="text_2g" indexed="true" stored="true" required="false" multiValued="false" />
<field name="productNameKana" type="text_2g" indexed="true" stored="true" required="false" multiValued="false" />
<field name="promptForImage" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="image" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="price" type="plong" indexed="true" stored="true" required="false" multiValued="false" />
<field name="taxKbn" type="boolean" indexed="true" stored="true" required="false" multiValued="false" />
<field name="makerName" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="makerName_2g" type="text_2g" indexed="true" stored="true" required="false" multiValued="false" />
<field name="makerNameKana" type="text_2g" indexed="true" stored="true" required="false" multiValued="false" />
<field name="makerProduCode" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="categoryL" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="categoryL_2g" type="text_2g" indexed="true" stored="true" required="false" multiValued="false" />
<field name="categoryM" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="categoryM_2g" type="text_2g" indexed="true" stored="true" required="false" multiValued="false" />
<field name="categoryS" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="categoryS_2g" type="text_2g" indexed="true" stored="true" required="false" multiValued="false" />
<field name="categoryFacetInfo" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="stock" type="plong" indexed="true" stored="true" required="false" multiValued="false" />
<field name="size" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="weight" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="unitName" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="salesUnit" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="productInfo" type="text_ja" indexed="true" stored="true" required="false" multiValued="true" />
<field name="productInfo_2g" type="text_2g" indexed="true" stored="true" required="false" multiValued="true" />
<field name="standardDeliveryDate" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="campaignFlg" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="campaignStart" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="campaignEnd" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="campaignPrice" type="plong" indexed="true" stored="true" required="false" multiValued="false" />
<field name="dateSince" type="pdate" indexed="true" stored="true"/>
The main features of this configuration and data are as follows:
<analyzer />
child elements are configured for 'character normalization', 'word tokenization', 'token filtering', and 'synonym expansion'.image
field contains links to images generated by Generative AI.This is a configuration and data schema that includes the names, addresses, latitude, and longitude of public facilities in Saitama City. It is an ideal configuration and dataset for testing the functionality of facets and geospatial search.
Location information of public facilities CONFIG(Solr 9)
Location information of public facilities DATA
The managed-schema.xml
defines fields like the following:
<uniqueKey>id</uniqueKey>
<field name="_version_" type="plong" indexed="false" stored="false"/>
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false"/>
<field name="category" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="name" type="text_ja" indexed="true" stored="true"/>
<field name="name_2g" type="text_2g" indexed="true" stored="true"/>
<field name="name_forward" type="text_forward" indexed="true" stored="true"/>
<field name="name_backward" type="text_backward" indexed="true" stored="true"/>
<field name="hierarchy" type="pathhie" indexed="true" stored="true" omitNorms="true" multiValued="false"/>
<field name="address" type="text_ja" indexed="true" stored="true"/>
<field name="address_2g" type="text_2g" indexed="true" stored="true"/>
<field name="location" type="location" indexed="true" stored="true" multiValued="false"/>
The main features of this configuration and data are as follows:
Let's take a closer look at geospatial search.
Geospatial search is a type of search that involves indexing latitude and longitude information when dealing with buildings or facilities as searchable documents. This enables filtering facilities based on their location on a map, displaying search results in order of proximity from a specific location, and other similar functionalities.
Here, let's try performing a geospatial search using the example of 'searching for public facilities within a 1km radius of Saitama City Hall in order of proximity'.
First, navigate to the 'Search' in the instance view. Specify the following parameters to obtain the latitude and longitude of さいたま市役所 (Saitama City Hall).
市役所
/select
name
location
name
The latitude and longitude of the Saitama City Hall will be displayed, so we will conduct the search using the following parameters.
/spatial
name
location
pt=35.861648,139.645496
Then, public facilities near Saitama City Hall are displayed in order of proximity to the Saitama city hall.
Open a new tab in your browser and access Google Maps. Then, use the latitude and longitude displayed on the search screen to search and display the location of the facility.
EXAMPLE
For the case of 消防局庁舎・浦和消防署 (the Fire Department Headquarters and Urawa Fire Station): 35.862001,139.646234
The solrconfig.xml
config file used here specifies the following parameters required for geospatial search by default.
Let's check the contents of the solrconfig.xml by specifying the collection (geospatial) in the 'Config' of the instance view.
The above parameters are defined in solrconfig.xml as follows:
<requestHandler name="/spatial" class="solr.SearchHandler">
<lst name="defaults">
:
<str name="spatial">true</str>
<str name="fq">{!geofilt}</str>
<str name="sort">geodist() asc</str>
<str name="sfield">location</str>
<int name="d">1</int>
:
</lst>
:
</requestHandler>
Here is the explanation about geospatial search.
Additionally, the managed-schema.xml
of this configuration defines fields as follows:
<uniqueKey>id</uniqueKey>
<field name="_version_" type="plong" indexed="false" stored="false"/>
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false"/>
<field name="category" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="name" type="text_ja" indexed="true" stored="true"/>
<field name="name_2g" type="text_2g" indexed="true" stored="true"/>
<field name="name_forward" type="text_forward" indexed="true" stored="true"/>
<field name="name_backward" type="text_backward" indexed="true" stored="true"/>
<field name="hierarchy" type="pathhie" indexed="true" stored="true" omitNorms="true" multiValued="false"/>
<field name="address" type="text_ja" indexed="true" stored="true"/>
<field name="address_2g" type="text_2g" indexed="true" stored="true"/>
<field name="location" type="location" indexed="true" stored="true" multiValued="false"/>
Based on the open data of childcare facilities (licensed daycare centers) in Chiba Prefecture, this is a config and data structured schema including facility municipality name, facility name, address, etc. It's the optimal config and data for testing facet functions filtered by municipality or day of use.
Childcare facilities CONFIG(Solr 9)
The managed-schema.xml
defines fields like the following (The following may differ depending on the version of the config.):
<uniqueKey>id</uniqueKey>
<field name="prefecturesCode" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="prefecturesName" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="cityName" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="childcareCenterName" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="childcareCenterNameKana" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="childcareCenterKbn" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="address1" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="address2" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="latitude" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="longitude" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="access" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="parkingInfo" type="text_ja" indexed="true" stored="true" required="false" multiValued="true" />
<field name="parkingFlg" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="telNo" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="telExNo" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="faxNo" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="corpNo" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="groupName" type="text_ja" indexed="true" stored="true" required="false" multiValued="false" />
<field name="authDate" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="capacity" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="targetAge" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="availDate" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="monFlg" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="tueFlg" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="wedFlg" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="thuFlg" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="friFlg" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="satFlg" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="sunFlg" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="stTime" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="endTime" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="availDaytimeInfo" type="text_ja" indexed="true" stored="true" required="false" multiValued="true" />
<field name="tempAccept" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="url" type="string" indexed="true" stored="true" required="false" multiValued="false" />
<field name="remarks" type="text_ja" indexed="true" stored="true" required="false" multiValued="true" />
The main features of this configuration and data are as follows:
Above was the introduction of the configurations and data distributed through the extension library.