- const u8 *byte, *start, *end;
- int bit, startbit;
- u32 g1, g2, misaligned;
- unsigned long *plong;
- unsigned long lskipval;
-
- lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
- g1 = (goal / GFS2_NBBY);
- start = buffer + g1;
- byte = start;
- end = buffer + buflen;
- g2 = ALIGN(g1, sizeof(unsigned long));
- plong = (unsigned long *)(buffer + g2);
- startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
- misaligned = g2 - g1;
- if (!misaligned)
- goto ulong_aligned;
-/* parse the bitmap a byte at a time */
-misaligned:
- while (byte < end) {
- if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
- return goal +
- (((byte - start) * GFS2_NBBY) +
- ((bit - startbit) >> 1));
- }
- bit += GFS2_BIT_SIZE;
- if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
- bit = 0;
- byte++;
- misaligned--;
- if (!misaligned) {
- plong = (unsigned long *)byte;
- goto ulong_aligned;
- }
- }
- }
- return BFITNOENT;
-
-/* parse the bitmap a unsigned long at a time */
-ulong_aligned:
- /* Stop at "end - 1" or else prefetch can go past the end and segfault.
- We could "if" it but we'd lose some of the performance gained.
- This way will only slow down searching the very last 4/8 bytes
- depending on architecture. I've experimented with several ways
- of writing this section such as using an else before the goto
- but this one seems to be the fastest. */
- while ((unsigned char *)plong < end - sizeof(unsigned long)) {
- prefetch(plong + 1);
- if (((*plong) & LBITMASK) != lskipval)
- break;
- plong++;
- }
- if ((unsigned char *)plong < end) {
- byte = (const u8 *)plong;
- misaligned += sizeof(unsigned long) - 1;
- goto misaligned;
+ u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
+ const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
+ const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
+ u64 tmp;
+ u64 mask = 0x5555555555555555ULL;
+ u32 bit;
+
+ BUG_ON(state > 3);
+
+ /* Mask off bits we don't care about at the start of the search */
+ mask <<= spoint;
+ tmp = gfs2_bit_search(ptr, mask, state);
+ ptr++;
+ while(tmp == 0 && ptr < end) {
+ tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
+ ptr++;