Skip to content

Commit

Permalink
#7 Solved the SearchGenerator result ordering problem.
Browse files Browse the repository at this point in the history
Sort the pages generated by SearchGenerator .
  • Loading branch information
CXuesong committed Mar 7, 2017
1 parent 242f086 commit 6ed35c9
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 26 deletions.
14 changes: 8 additions & 6 deletions UnitTestProject1/GeneratorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -258,24 +258,26 @@ public void WpGetQueryPageNamesTest()
public void WpTestGetSearchTest()
{
var site = WpTestSite;
var generator = new SearchGenerator(site, "test");
var generator = new SearchGenerator(site, "test") {PagingSize = 20};
var pages = generator.EnumPages().Take(100).ToList();
TracePages(pages);
AssertTitlesDistinct(pages);
}

[TestMethod]
public void WpLzhGetSearchTest()
public void WpLzhSearchTest()
{
var site = WpLzhSite;
var generator = new SearchGenerator(site, "維基");
var pages = generator.EnumPages().Take(100).ToList();
var pages = generator.EnumPages().Take(50).ToList();
TracePages(pages);
AssertTitlesDistinct(pages);
// Note as 2017-03-07, [[維基]] actually exists on lzh wiki, but it's a redirect to [[維基媒體基金會]].
// Maybe that's why it's not included in the search result.
//Assert.IsTrue(pages.Any(p => p.Title == "維基"));
//Assert.IsTrue(pages.Any(p => p.Title == "維基媒體基金會"));
//Assert.IsTrue(pages.Any(p => p.Title == "維基大典"));
//Assert.IsTrue(pages.Any(p => p.Title == "文言維基大典"));
Assert.IsTrue(pages.Any(p => p.Title == "維基媒體基金會"));
Assert.IsTrue(pages.Any(p => p.Title == "維基大典"));
Assert.IsTrue(pages.Any(p => p.Title == "文言維基大典"));
}

[TestMethod]
Expand Down
4 changes: 3 additions & 1 deletion WikiClientLibrary/Generators/PageGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ public int GetActualPagingSize()
/// If <see cref="PagingSize"/> is <c>null</c>, and <see cref="PageQueryOptions.FetchContent"/> is specified,
/// the default limit will be 1/10 of the original default limit (500 for bots and 50 for users).
/// (See https://www.mediawiki.org/wiki/API:Revisions .)
/// This will not affect the manually set <see cref="PagingSize"/>.
/// If you have manually set <see cref="PagingSize"/>, this function will directly return the value you have set,
/// but any value exceeding the server limit will case problems, such as empty content retrieved (even if
/// you have set <see cref="PageQueryOptions.FetchContent"/>), or <see cref="WikiClientException"/>.
/// </remarks>
public int GetActualPagingSize(PageQueryOptions options)
{
Expand Down
8 changes: 5 additions & 3 deletions WikiClientLibrary/Generators/SearchGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ public SearchGenerator(Site site, string keyword) : base(site)
/// <summary>
/// Search inside the text or titles.
/// </summary>
/// <remarks>Default: <see cref="SearchableField.Text"/>. This is slightly different from
/// the MediaWiki default behavior.</remarks>
public SearchableField MatchingField
{
get { return _MatchingField; }
Expand Down Expand Up @@ -102,10 +100,14 @@ protected override IEnumerable<KeyValuePair<string, object>> GetGeneratorParams(
/// </summary>
public enum SearchableField
{
/// <summary>
/// Use the site MediaWiki site default behavior.
/// </summary>
Default,
/// <summary>
/// Search in page titles. Note that Wikipedia does not support this flag.
/// </summary>
Title = 0,
Title,
/// <summary>
/// Search in page text.
/// </summary>
Expand Down
37 changes: 21 additions & 16 deletions WikiClientLibrary/PageFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,25 +75,30 @@ internal static IList<Page> FromJsonQueryResult(Site site, JObject queryNode, Pa
{
if (site == null) throw new ArgumentNullException(nameof(site));
if (queryNode == null) throw new ArgumentNullException(nameof(queryNode));
var pages = (JObject)queryNode["pages"];
var pages = (JObject) queryNode["pages"];
if (pages == null) return EmptyPages;
return pages.Properties().Select(page =>
{
Page newInst;
if (page.Value["categoryinfo"] != null)
newInst = new Category(site);
else if ((string)page.Value["contentmodel"] == ContentModels.FlowBoard)
// If query.xxx.index exists, sort the pages by the given index.
// This is specifically used with SearchGenerator, to keep the search result in order.
// For other generators, this property simply does not exist.
// See https://www.mediawiki.org/wiki/API_talk:Query#On_the_order_of_titles_taken_out_of_generator .
return pages.Properties().OrderBy(page => (int?) page.Value["index"])
.Select(page =>
{
if ((int)page.Value["ns"] == FlowNamespaces.Topic)
newInst = new Topic(site);
Page newInst;
if (page.Value["categoryinfo"] != null)
newInst = new Category(site);
else if ((string) page.Value["contentmodel"] == ContentModels.FlowBoard)
{
if ((int) page.Value["ns"] == FlowNamespaces.Topic)
newInst = new Topic(site);
else
newInst = new Board(site);
}
else
newInst = new Board(site);
}
else
newInst = new Page(site);
newInst.LoadFromJson(page, options);
return newInst;
}).ToList();
newInst = new Page(site);
newInst.LoadFromJson(page, options);
return newInst;
}).ToList();
}

}
Expand Down

0 comments on commit 6ed35c9

Please sign in to comment.