github4s
github4s copied to clipboard
Support for auto-pagination
like in octokit:
- https://github.com/octokit/octokit.rb#pagination
- https://github.com/octokit/octokit.rb#hypermedia-agent
I have something like that at the moment:
def autoPaginate[T](
call: Pagination => IO[Either[GHException, GHResult[List[T]]]]
): IO[Either[GHException, List[T]]] = (for {
firstPage <- EitherT(call(Pagination(1, 100)))
pages = (utils.getNrPages(firstPage.headers) match {
case Some(n) if n >= 2 => (2 to n).toList
case _ => Nil
}).map(Pagination(_, 100))
restPages <- EitherT(pages.traverse(call(_)).map(_.sequence))
} yield firstPage.result ++ restPages.map(_.result).flatten).value
final case class Relation(name: String, url: String)
private val relPattern = """<(.*?)>; rel="(\w+)"""".r
def getNrPages(headers: Map[String, Seq[String]]): Option[Int] = for {
links <- headers.map { case (k, v) => k.toLowerCase -> v }.get("link")
h <- links.headOption
relations = h.split(", ").flatMap {
case relPattern(url, name) => Some(Relation(name, url))
case _ => None
}
lastRelation <- relations.find(_.name == "last")
uri <- Uri.fromString(lastRelation.url).toOption
lastPage <- uri.params.get("page")
nrPages <- Try(lastPage.toInt).toOption
} yield nrPages
Whilst still being limited by not being able to use the URLs directly, as Github recommends, I came up with an alternative solution that does not rely on the number of pages changing:
def autoPage[F[_]: Sync, T](first: Pagination)
(call: Pagination => F[Either[GHException, GHResult[List[T]]]]): Stream[F, T] = {
val chunker = call.andThen(_.rethrow.map(res => nextPage(getRelations(res.headers)).map(Chunk.seq(res.result) -> _)))
Stream.unfoldChunkEval(first)(chunker)
}
def nextPage(relations: Map[String, (Int, Int)]): Option[Pagination] = {
relations.get("next").map((Pagination.apply _).tupled)
}
def getRelations(headers: Map[String, String]): Map[String, (Int, Int)] = {
val relations = for {
header <- headers.map { case (k, v) => k.toLowerCase -> v }.collect { case ("link", header) => header }
link(url, relation) <- link.findAllMatchIn(header)
uri <- Uri.fromString(url).toSeq
page <- uri.params.get("page")
pageNum <- Try(page.toInt).toOption
perPage <- uri.params.get("per_page")
perPageNum <- Try(perPage.toInt).toOption
} yield (relation, (pageNum, perPageNum))
relations.toMap
}
val link: Regex = """<(.*?)>; rel="(\w+)"""".r
It can be easily adapted to "move" in either direction, and relies on fs2 instead of loading it all to memory.
As a side note, v4 API (graphql) does not support page numbers. Navigation is done by getting N items before the first one or after the last one in the current window. These "positions" are returned as "cursors", which are opaque types.
The code above can be adapted to v4's system.
My autoPage
function had a bug: it missed the last page, if there was more than one. Small change in implementation:
def autoPage[F[_]: Sync, T](
first: Pagination)(
call: Pagination => F[Either[GHException, GHResult[List[T]]]])
: Stream[F, T] = {
val chunker: Option[Pagination] => F[Option[(Chunk[T], Option[Pagination])]] = {
case Some(pagination) =>
call(pagination).rethrow.map(res => Option(Chunk.seq(res.result) -> nextPage(getRelations(res.headers))))
case None =>
Sync[F].pure(None)
}
Stream.unfoldChunkEval(Option(first))(chunker)
}
I see many solution. @BenFradet @dcsobral It is possible to make a PR to integrate one into the project?
@zhenleibb yes feel free to open one :+1:
Updated the code @dcsobral wrote to match current code in this repo - I am not that good with cats, so I had to replace F with IO:
def autoPage[T](
first: Pagination)(
call: Pagination => IO[GHResponse[List[T]]])
: Stream[IO, T] = {
val chunker: Option[Pagination] => IO[Option[(Chunk[T], Option[Pagination])]] = {
case Some(pagination) =>
call(pagination)
.flatMap{ res =>
IO.fromEither(res.result).map{ items =>
Option(Chunk.seq(items) -> nextPage(getRelations(res.headers)))
}
}
case None =>
IO.pure(None)
}
Stream.unfoldChunkEval(Option(first))(chunker)
}
Did it migrate to Cats 3? Hardcoding IO
is the wrong thing for Cats 2.
A simple solution which depends on only Monad and MonadError
def listAllOrgRepos[F[_]](repos: Repositories[F], orgName: String)(implicit
monadError: MonadError[F, Throwable]
): F[List[Repository]] = mergeGhPages(page =>
repos
.listOrgRepos(orgName, pagination = Some(Pagination(page, 100)))
)
def mergeGhPages[F[_], T](getPage: Int => F[GHResponse[List[T]]])(implicit
monadError: MonadError[F, Throwable]
) =
mergePages(
getPage.andThen(response =>
response
.map(_.result)
.flatMap(monadError.fromEither)
)
)
def mergePages[F[_]: Monad, T](getPage: Int => F[List[T]]): F[List[T]] =
Monad[F].tailRecM((1, Nil: List[T]))({ case (page: Int, acc: List[T]) =>
getPage(page).map({
case Nil => Right(acc)
case ts => Left((page + 1, acc.prependedAll(ts)))
})
})